[src/trunk]: src/dist/nvi/regex reliability fix, merge libc/regex fix to avoi...

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/trunk]: src/dist/nvi/regex reliability fix, merge libc/regex fix to avoi...
From: tnozaki <tnozaki%NetBSD.org@localhost>
Date: Mon, 06 Apr 2020 21:40:11 +0000
details:   https://anonhg.NetBSD.org/src/rev/3761db23df40
branches:  trunk
changeset: 771409:3761db23df40
user:      tnozaki <tnozaki%NetBSD.org@localhost>
date:      Sat Nov 19 17:45:11 2011 +0000

description:
reliability fix, merge libc/regex fix to avoid memory exhaust problem.

diffstat:

 dist/nvi/regex/engine.c  |    6 +-
 dist/nvi/regex/regcomp.c |  134 +++++++++++++++++++++++++++++-----------------
 dist/nvi/regex/regex2.h  |   16 ++--
 3 files changed, 96 insertions(+), 60 deletions(-)

diffs (truncated from 448 to 300 lines):

diff -r 776ab551e667 -r 3761db23df40 dist/nvi/regex/engine.c
--- a/dist/nvi/regex/engine.c   Sat Nov 19 17:40:19 2011 +0000
+++ b/dist/nvi/regex/engine.c   Sat Nov 19 17:45:11 2011 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: engine.c,v 1.6 2009/04/12 14:47:51 tnozaki Exp $ */
+/*     $NetBSD: engine.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */
 
 /*-
  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
@@ -168,8 +168,8 @@
        /* prescreening; this does wonders for this rather slow code */
        if (g->must != NULL) {
                for (dp = start; dp < stop; dp++)
-                       if (*dp == g->must[0] && stop - dp >= g->mlen &&
-                               MEMCMP(dp, g->must, (size_t)g->mlen) == 0)
+                       if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen &&
+                               MEMCMP(dp, g->must, g->mlen) == 0)
                                break;
                if (dp == stop)         /* we didn't find g->must */
                        return(REG_NOMATCH);
diff -r 776ab551e667 -r 3761db23df40 dist/nvi/regex/regcomp.c
--- a/dist/nvi/regex/regcomp.c  Sat Nov 19 17:40:19 2011 +0000
+++ b/dist/nvi/regex/regcomp.c  Sat Nov 19 17:45:11 2011 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: regcomp.c,v 1.6 2011/03/21 14:53:03 tnozaki Exp $ */
+/*     $NetBSD: regcomp.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */
 
 /*-
  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
@@ -82,11 +82,11 @@
 #endif
 
 /* === regcomp.c === */
-static void p_ere __P((struct parse *p, int stop));
-static void p_ere_exp __P((struct parse *p));
+static void p_ere __P((struct parse *p, int stop, size_t reclimit));
+static void p_ere_exp __P((struct parse *p, size_t reclimit));
 static void p_str __P((struct parse *p));
-static void p_bre __P((struct parse *p, int end1, int end2));
-static int p_simp_re __P((struct parse *p, int starordinary));
+static void p_bre __P((struct parse *p, int end1, int end2, size_t reclimit));
+static int p_simp_re __P((struct parse *p, int starordinary, size_t reclimit));
 static int p_count __P((struct parse *p));
 static void p_bracket __P((struct parse *p));
 static void p_b_term __P((struct parse *p, cset *cs));
@@ -98,7 +98,7 @@
 static void bothcases __P((struct parse *p, int ch));
 static void ordinary __P((struct parse *p, int ch));
 static void nonnewline __P((struct parse *p));
-static void repeat __P((struct parse *p, sopno start, int from, int to));
+static void repeat __P((struct parse *p, sopno start, int from, int to, size_t reclimit));
 static int seterr __P((struct parse *p, int e));
 static cset *allocset __P((struct parse *p));
 static void freeset __P((struct parse *p, cset *cs));
@@ -122,7 +122,7 @@
 static void doemit __P((struct parse *p, sop op, size_t opnd));
 static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
 static void dofwd __P((struct parse *p, sopno pos, sop value));
-static void enlarge __P((struct parse *p, sopno size));
+static int enlarge __P((struct parse *p, sopno size));
 static void stripsnug __P((struct parse *p, struct re_guts *g));
 static void findmust __P((struct parse *p, struct re_guts *g));
 static sopno pluscount __P((struct parse *p, struct re_guts *g));
@@ -170,6 +170,13 @@
 #define        never   0               /* some <assert.h>s have bugs too */
 #endif
 
+#define        MEMLIMIT        0x8000000
+#define MEMSIZE(p) \
+       ((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
+       (p)->ncsalloc * sizeof(cset) + \
+       (p)->ssize * sizeof(sop))
+#define        RECLIMIT        256
+
 /*
  - regcomp - interface for parser and compilation
  = extern int regcomp(regex_t *, const RCHAR_T *, int);
@@ -258,11 +265,11 @@
        EMIT(OEND, 0);
        g->firststate = THERE();
        if (cflags&REG_EXTENDED)
-               p_ere(p, OUT);
+               p_ere(p, OUT, 0);
        else if (cflags&REG_NOSPEC)
                p_str(p);
        else
-               p_bre(p, OUT, OUT);
+               p_bre(p, OUT, OUT, 0);
        EMIT(OEND, 0);
        g->laststate = THERE();
 
@@ -289,10 +296,10 @@
 
 /*
  - p_ere - ERE parser top level, concatenation and alternation
- == static void p_ere(register struct parse *p, int stop);
+ == static void p_ere(register struct parse *p, int stop, size_t reclimit);
  */
 static void
-p_ere(register struct parse *p, int stop)
+p_ere(register struct parse *p, int stop, size_t reclimit)
                          
                                /* character this ERE should end at */
 {
@@ -302,11 +309,16 @@
        register sopno conc;
        register int first = 1;         /* is this the first alternative? */
 
+       if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
+               p->error = REG_ESPACE;
+               return;
+       }
+
        for (;;) {
                /* do a bunch of concatenated expressions */
                conc = HERE();
                while (MORE() && (c = PEEK()) != '|' && c != stop)
-                       p_ere_exp(p);
+                       p_ere_exp(p, reclimit);
                (void)REQUIRE(HERE() != conc, REG_EMPTY);       /* require nonempty */
 
                if (!EAT('|'))
@@ -338,7 +350,7 @@
  == static void p_ere_exp(register struct parse *p);
  */
 static void
-p_ere_exp(register struct parse *p)
+p_ere_exp(register struct parse *p, size_t reclimit)
 {
        register char c;
        register sopno pos;
@@ -360,7 +372,7 @@
                        p->pbegin[subno] = HERE();
                EMIT(OLPAREN, subno);
                if (!SEE(')'))
-                       p_ere(p, ')');
+                       p_ere(p, ')', reclimit);
                if (subno < NPAREN) {
                        p->pend[subno] = HERE();
                        assert(p->pend[subno] != 0);
@@ -462,7 +474,7 @@
                                count2 = INFINITY;
                } else          /* just a single number */
                        count2 = count;
-               repeat(p, pos, count, count2);
+               repeat(p, pos, count, count2, 0);
                if (!EAT('}')) {        /* error heuristics */
                        while (MORE() && PEEK() != '}')
                                NEXT();
@@ -496,7 +508,7 @@
 /*
  - p_bre - BRE parser top level, anchoring and concatenation
  == static void p_bre(register struct parse *p, register int end1, \
- ==    register int end2);
+ ==    register int end2, size_t reclimit);
  * Giving end1 as OUT essentially eliminates the end1/end2 check.
  *
  * This implementation is a bit of a kludge, in that a trailing $ is first
@@ -506,22 +518,29 @@
  * The amount of lookahead needed to avoid this kludge is excessive.
  */
 static void
-p_bre(register struct parse *p, register int end1, register int end2)
+p_bre(register struct parse *p, register int end1, register int end2, size_t reclimit)
                          
                                /* first terminating character */
                                /* second terminating character */
 {
-       register sopno start = HERE();
+       register sopno start;
        register int first = 1;                 /* first subexpression? */
        register int wasdollar = 0;
 
+       if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
+               p->error = REG_ESPACE;
+               return;
+       }
+
+       start = HERE();
+
        if (EAT('^')) {
                EMIT(OBOL, 0);
                p->g->iflags |= USEBOL;
                p->g->nbol++;
        }
        while (MORE() && !SEETWO(end1, end2)) {
-               wasdollar = p_simp_re(p, first);
+               wasdollar = p_simp_re(p, first, reclimit);
                first = 0;
        }
        if (wasdollar) {        /* oops, that was a trailing anchor */
@@ -536,10 +555,10 @@
 
 /*
  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
- == static int p_simp_re(register struct parse *p, int starordinary);
+ == static int p_simp_re(register struct parse *p, int starordinary, size_t reclimit);
  */
 static int                     /* was the simple RE an unbackslashed $? */
-p_simp_re(register struct parse *p, int starordinary)
+p_simp_re(register struct parse *p, int starordinary, size_t reclimit)
                          
                                /* is a leading * an ordinary character? */
 {
@@ -571,7 +590,7 @@
                        EMIT(OLPAREN, subno);
                        /* the MORE here is an error heuristic */
                        if (MORE() && !SEETWO('\\', ')'))
-                               p_bre(p, '\\', ')');
+                               p_bre(p, '\\', ')', reclimit);
                        if (subno < NPAREN) {
                                p->pend[subno] = HERE();
                                assert(p->pend[subno] != 0);
@@ -646,7 +665,7 @@
                                count2 = INFINITY;
                } else          /* just a single number */
                        count2 = count;
-               repeat(p, pos, count, count2);
+               repeat(p, pos, count, count2, reclimit);
                if (!EATTWO('\\', '}')) {       /* error heuristics */
                        while (MORE() && !SEETWO('\\', '}'))
                                NEXT();
@@ -688,11 +707,15 @@
 static void
 p_bracket(register struct parse *p)
 {
-       register cset *cs = allocset(p);
+       register cset *cs;
        register int invert = 0;
        static RCHAR_T bow[] = { '[', ':', '<', ':', ']', ']' };
        static RCHAR_T eow[] = { '[', ':', '>', ':', ']', ']' };
 
+       cs = allocset(p);
+       if (cs == NULL)
+               return;
+
        /* Dept of Truly Sickening Special-Case Kludges */
        if (p->next + 5 < p->end && MEMCMP(p->next, bow, 6) == 0) {
                EMIT(OBOW, 0);
@@ -1005,25 +1028,29 @@
 
 /*
  - repeat - generate code for a bounded repetition, recursively if needed
- == static void repeat(register struct parse *p, sopno start, int from, int to);
+ == static void repeat(register struct parse *p, sopno start, int from, int to, size_t reclimit);
  */
 static void
-repeat(register struct parse *p, sopno start, int from, int to)
+repeat(register struct parse *p, sopno start, int from, int to, size_t reclimit)
                          
                                /* operand from here to end of strip */
                                /* repeated from this number */
                                        /* to this number of times (maybe INFINITY) */
 {
-       register sopno finish = HERE();
+       register sopno finish;
 #      define  N       2
 #      define  INF     3
 #      define  REP(f, t)       ((f)*8 + (t))
 #      define  MAP(n)  (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
        register sopno copy;
 
-       if (p->error != 0)      /* head off possible runaway recursion */
+       if (reclimit++ > RECLIMIT) 
+               p->error = REG_ESPACE;
+       if (p->error)
                return;
 
+       finish = HERE();
+
        assert(from <= to);
 
        switch (REP(MAP(from), MAP(to))) {
@@ -1035,7 +1062,7 @@
        case REP(0, INF):               /* as x{1,}? */
                /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
                INSERT(OCH_, start);            /* offset is wrong... */
-               repeat(p, start+1, 1, to);
+               repeat(p, start+1, 1, to, reclimit);
                ASTERN(OOR1, start);
                AHEAD(start);                   /* ... fix it */
                EMIT(OOR2, 0);
@@ -1055,7 +1082,7 @@
                ASTERN(O_CH, THERETHERE());
                copy = dupl(p, start+1, finish+1);
                assert(copy == finish+4);
-               repeat(p, copy, 1, to-1);
+               repeat(p, copy, 1, to-1, reclimit);
                break;
        case REP(1, INF):               /* as x+ */
                INSERT(OPLUS_, start);
@@ -1063,11 +1090,11 @@
                break;
        case REP(N, N):                 /* as xx{m-1,n-1} */
                copy = dupl(p, start, finish);
-               repeat(p, copy, from-1, to-1);
+               repeat(p, copy, from-1, to-1, reclimit);
                break;
        case REP(N, INF):               /* as xx{n-1,INF} */
                copy = dupl(p, start, finish);
Prev by Date: [src/trunk]: src/share/misc One more.
Next by Date: [src/trunk]: src/share/misc Add a few.
Previous by Thread: [src/trunk]: src/share/misc One more.
Next by Thread: [src/trunk]: src/share/misc Add a few.
Indexes:
Home | Main Index | Thread Index | Old Index