Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/bin/sh Convert the pattern matcher from recursive to backtra...



details:   https://anonhg.NetBSD.org/src/rev/0368352711b5
branches:  trunk
changeset: 823572:0368352711b5
user:      christos <christos%NetBSD.org@localhost>
date:      Wed Apr 26 17:43:33 2017 +0000

description:
Convert the pattern matcher from recursive to backtracking (from FreeBSD).

diffstat:

 bin/sh/expand.c |  117 +++++++++++++++++++++++++++++++++----------------------
 bin/sh/expand.h |    3 +-
 2 files changed, 72 insertions(+), 48 deletions(-)

diffs (246 lines):

diff -r 9496c8588862 -r 0368352711b5 bin/sh/expand.c
--- a/bin/sh/expand.c   Wed Apr 26 14:56:54 2017 +0000
+++ b/bin/sh/expand.c   Wed Apr 26 17:43:33 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: expand.c,v 1.104 2017/03/20 11:48:01 kre Exp $ */
+/*     $NetBSD: expand.c,v 1.105 2017/04/26 17:43:33 christos Exp $    */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)expand.c   8.5 (Berkeley) 5/15/95";
 #else
-__RCSID("$NetBSD: expand.c,v 1.104 2017/03/20 11:48:01 kre Exp $");
+__RCSID("$NetBSD: expand.c,v 1.105 2017/04/26 17:43:33 christos Exp $");
 #endif
 #endif /* not lint */
 
@@ -52,6 +52,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <wctype.h>
+#include <wchar.h>
 
 /*
  * Routines to expand arguments to commands.  We have to deal with
@@ -112,8 +113,9 @@
 STATIC void addfname(char *);
 STATIC struct strlist *expsort(struct strlist *);
 STATIC struct strlist *msort(struct strlist *, int);
-STATIC int pmatch(char *, char *, int);
+STATIC int patmatch(const char *, const char *, int);
 STATIC char *cvtnum(int, char *);
+static int collate_range_cmp(wchar_t, wchar_t);
 
 /*
  * Expand shell variables and backquotes inside a here document.
@@ -129,6 +131,18 @@
 }
 
 
+static int
+collate_range_cmp(wchar_t c1, wchar_t c2)
+{
+       wchar_t s1[2], s2[2];
+
+       s1[0] = c1;
+       s1[1] = L'\0';
+       s2[0] = c2;
+       s2[1] = L'\0';
+       return (wcscoll(s1, s2));
+}
+
 /*
  * Perform variable substitution and command substitution on an argument,
  * placing the resulting list of arguments in arglist.  If EXP_FULL is true,
@@ -1399,7 +1413,7 @@
  * pointer is stored into *end.
  */
 static int
-match_charclass(char *p, wchar_t chr, char **end)
+match_charclass(const char *p, wchar_t chr, const char **end)
 {
        char name[20];
        char *nameend;
@@ -1427,35 +1441,29 @@
  * Returns true if the pattern matches the string.
  */
 
-int
-patmatch(char *pattern, char *string, int squoted)
+STATIC int
+patmatch(const char *pattern, const char *string, int squoted)
 {
-#ifdef notdef
-       if (pattern[0] == '!' && pattern[1] == '!')
-               return 1 - pmatch(pattern + 2, string);
-       else
-#endif
-               return pmatch(pattern, string, squoted);
-}
-
-
-STATIC int
-pmatch(char *pattern, char *string, int squoted)
-{
-       char *p, *q, *end;
+       const char *p, *q, *end;
+       const char *bt_p, *bt_q;
        char c;
+       wchar_t wc, wc2;
 
        p = pattern;
        q = string;
+       bt_p = NULL;
+       bt_q = NULL;
        for (;;) {
                switch (c = *p++) {
                case '\0':
-                       goto breakloop;
+                       if (*q != '\0')
+                               goto backtrack;
+                       return 1;
                case CTLESC:
                        if (squoted && *q == CTLESC)
                                q++;
                        if (*q++ != *p++)
-                               return 0;
+                               goto backtrack;
                        break;
                case CTLQUOTEMARK:
                        continue;
@@ -1482,17 +1490,19 @@
                                        q++;
                                }
                        }
-                       do {
-                               if (pmatch(p, q, squoted))
-                                       return 1;
-                               if (squoted && *q == CTLESC)
-                                       q++;
-                       } while (*q++ != '\0');
-                       return 0;
+                       /*
+                        * First try the shortest match for the '*' that
+                        * could work. We can forget any earlier '*' since
+                        * there is no way having it match more characters
+                        * can help us, given that we are already here.
+                        */
+                       bt_p = p;
+                       bt_q = q;
+                       break;
                case '[': {
-                       char *endp;
+                       const char *savep, *saveq, *endp;
                        int invert, found;
-                       char chr;
+                       unsigned char chr;
 
                        endp = p;
                        if (*endp == '!')
@@ -1508,20 +1518,25 @@
                                        break;
                        }
                        invert = 0;
-                       if (*p == '!') {
+                       savep = p, saveq = q;
+                       invert = 0;
+                       if (*p == '!' || *p == '^') {
                                invert++;
                                p++;
                        }
                        found = 0;
-                       chr = *q++;
-                       if (squoted && chr == CTLESC)
-                               chr = *q++;
-                       if (chr == '\0')
+                       if (*q == '\0')
                                return 0;
+                       chr = (unsigned char)*q++;
                        c = *p++;
                        do {
                                if (c == CTLQUOTEMARK)
                                        continue;
+                               if (c == '\0') {
+                                       p = savep, q = saveq;
+                                       c = '[';
+                                       goto dft;
+                               }
                                if (c == '[' && *p == ':') {
                                        found |= match_charclass(p, chr, &end);
                                        if (end != NULL)
@@ -1529,36 +1544,46 @@
                                }
                                if (c == CTLESC)
                                        c = *p++;
+                               wc = (unsigned char)c;
                                if (*p == '-' && p[1] != ']') {
                                        p++;
-                                       while (*p == CTLQUOTEMARK)
-                                               p++;
                                        if (*p == CTLESC)
                                                p++;
-                                       if (chr >= c && chr <= *p)
+                                       wc2 = (unsigned char)*p++;
+                                       if (   collate_range_cmp(chr, wc) >= 0
+                                           && collate_range_cmp(chr, wc2) <= 0
+                                          )
                                                found = 1;
-                                       p++;
                                } else {
-                                       if (chr == c)
+                                       if (chr == wc)
                                                found = 1;
                                }
                        } while ((c = *p++) != ']');
                        if (found == invert)
-                               return 0;
+                               goto backtrack;
                        break;
                }
 dft:           default:
                        if (squoted && *q == CTLESC)
                                q++;
-                       if (*q++ != c)
+                       if (*q++ == c)
+                               break;
+backtrack:
+                       /*
+                        * If we have a mismatch (other than hitting the end
+                        * of the string), go back to the last '*' seen and
+                        * have it match one additional character.
+                        */
+                       if (bt_p == NULL)
                                return 0;
+                       if (*bt_q == '\0')
+                               return 0;
+                       bt_q++;
+                       p = bt_p;
+                       q = bt_q;
                        break;
                }
        }
-breakloop:
-       if (*q != '\0')
-               return 0;
-       return 1;
 }
 
 
diff -r 9496c8588862 -r 0368352711b5 bin/sh/expand.h
--- a/bin/sh/expand.h   Wed Apr 26 14:56:54 2017 +0000
+++ b/bin/sh/expand.h   Wed Apr 26 17:43:33 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: expand.h,v 1.20 2017/03/20 11:26:07 kre Exp $  */
+/*     $NetBSD: expand.h,v 1.21 2017/04/26 17:43:33 christos Exp $     */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -63,6 +63,5 @@
 void expandhere(union node *, int);
 void expandarg(union node *, struct arglist *, int);
 void expari(int);
-int patmatch(char *, char *, int);
 void rmescapes(char *);
 int casematch(union node *, char *);



Home | Main Index | Thread Index | Old Index