Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/bin/sh Convert the pattern matcher from recursive to backtra...
details: https://anonhg.NetBSD.org/src/rev/0368352711b5
branches: trunk
changeset: 823572:0368352711b5
user: christos <christos%NetBSD.org@localhost>
date: Wed Apr 26 17:43:33 2017 +0000
description:
Convert the pattern matcher from recursive to backtracking (from FreeBSD).
diffstat:
bin/sh/expand.c | 117 +++++++++++++++++++++++++++++++++----------------------
bin/sh/expand.h | 3 +-
2 files changed, 72 insertions(+), 48 deletions(-)
diffs (246 lines):
diff -r 9496c8588862 -r 0368352711b5 bin/sh/expand.c
--- a/bin/sh/expand.c Wed Apr 26 14:56:54 2017 +0000
+++ b/bin/sh/expand.c Wed Apr 26 17:43:33 2017 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: expand.c,v 1.104 2017/03/20 11:48:01 kre Exp $ */
+/* $NetBSD: expand.c,v 1.105 2017/04/26 17:43:33 christos Exp $ */
/*-
* Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
#if 0
static char sccsid[] = "@(#)expand.c 8.5 (Berkeley) 5/15/95";
#else
-__RCSID("$NetBSD: expand.c,v 1.104 2017/03/20 11:48:01 kre Exp $");
+__RCSID("$NetBSD: expand.c,v 1.105 2017/04/26 17:43:33 christos Exp $");
#endif
#endif /* not lint */
@@ -52,6 +52,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <wctype.h>
+#include <wchar.h>
/*
* Routines to expand arguments to commands. We have to deal with
@@ -112,8 +113,9 @@
STATIC void addfname(char *);
STATIC struct strlist *expsort(struct strlist *);
STATIC struct strlist *msort(struct strlist *, int);
-STATIC int pmatch(char *, char *, int);
+STATIC int patmatch(const char *, const char *, int);
STATIC char *cvtnum(int, char *);
+static int collate_range_cmp(wchar_t, wchar_t);
/*
* Expand shell variables and backquotes inside a here document.
@@ -129,6 +131,18 @@
}
+static int
+collate_range_cmp(wchar_t c1, wchar_t c2)
+{
+ wchar_t s1[2], s2[2];
+
+ s1[0] = c1;
+ s1[1] = L'\0';
+ s2[0] = c2;
+ s2[1] = L'\0';
+ return (wcscoll(s1, s2));
+}
+
/*
* Perform variable substitution and command substitution on an argument,
* placing the resulting list of arguments in arglist. If EXP_FULL is true,
@@ -1399,7 +1413,7 @@
* pointer is stored into *end.
*/
static int
-match_charclass(char *p, wchar_t chr, char **end)
+match_charclass(const char *p, wchar_t chr, const char **end)
{
char name[20];
char *nameend;
@@ -1427,35 +1441,29 @@
* Returns true if the pattern matches the string.
*/
-int
-patmatch(char *pattern, char *string, int squoted)
+STATIC int
+patmatch(const char *pattern, const char *string, int squoted)
{
-#ifdef notdef
- if (pattern[0] == '!' && pattern[1] == '!')
- return 1 - pmatch(pattern + 2, string);
- else
-#endif
- return pmatch(pattern, string, squoted);
-}
-
-
-STATIC int
-pmatch(char *pattern, char *string, int squoted)
-{
- char *p, *q, *end;
+ const char *p, *q, *end;
+ const char *bt_p, *bt_q;
char c;
+ wchar_t wc, wc2;
p = pattern;
q = string;
+ bt_p = NULL;
+ bt_q = NULL;
for (;;) {
switch (c = *p++) {
case '\0':
- goto breakloop;
+ if (*q != '\0')
+ goto backtrack;
+ return 1;
case CTLESC:
if (squoted && *q == CTLESC)
q++;
if (*q++ != *p++)
- return 0;
+ goto backtrack;
break;
case CTLQUOTEMARK:
continue;
@@ -1482,17 +1490,19 @@
q++;
}
}
- do {
- if (pmatch(p, q, squoted))
- return 1;
- if (squoted && *q == CTLESC)
- q++;
- } while (*q++ != '\0');
- return 0;
+ /*
+ * First try the shortest match for the '*' that
+ * could work. We can forget any earlier '*' since
+ * there is no way having it match more characters
+ * can help us, given that we are already here.
+ */
+ bt_p = p;
+ bt_q = q;
+ break;
case '[': {
- char *endp;
+ const char *savep, *saveq, *endp;
int invert, found;
- char chr;
+ unsigned char chr;
endp = p;
if (*endp == '!')
@@ -1508,20 +1518,25 @@
break;
}
invert = 0;
- if (*p == '!') {
+ savep = p, saveq = q;
+ invert = 0;
+ if (*p == '!' || *p == '^') {
invert++;
p++;
}
found = 0;
- chr = *q++;
- if (squoted && chr == CTLESC)
- chr = *q++;
- if (chr == '\0')
+ if (*q == '\0')
return 0;
+ chr = (unsigned char)*q++;
c = *p++;
do {
if (c == CTLQUOTEMARK)
continue;
+ if (c == '\0') {
+ p = savep, q = saveq;
+ c = '[';
+ goto dft;
+ }
if (c == '[' && *p == ':') {
found |= match_charclass(p, chr, &end);
if (end != NULL)
@@ -1529,36 +1544,46 @@
}
if (c == CTLESC)
c = *p++;
+ wc = (unsigned char)c;
if (*p == '-' && p[1] != ']') {
p++;
- while (*p == CTLQUOTEMARK)
- p++;
if (*p == CTLESC)
p++;
- if (chr >= c && chr <= *p)
+ wc2 = (unsigned char)*p++;
+ if ( collate_range_cmp(chr, wc) >= 0
+ && collate_range_cmp(chr, wc2) <= 0
+ )
found = 1;
- p++;
} else {
- if (chr == c)
+ if (chr == wc)
found = 1;
}
} while ((c = *p++) != ']');
if (found == invert)
- return 0;
+ goto backtrack;
break;
}
dft: default:
if (squoted && *q == CTLESC)
q++;
- if (*q++ != c)
+ if (*q++ == c)
+ break;
+backtrack:
+ /*
+ * If we have a mismatch (other than hitting the end
+ * of the string), go back to the last '*' seen and
+ * have it match one additional character.
+ */
+ if (bt_p == NULL)
return 0;
+ if (*bt_q == '\0')
+ return 0;
+ bt_q++;
+ p = bt_p;
+ q = bt_q;
break;
}
}
-breakloop:
- if (*q != '\0')
- return 0;
- return 1;
}
diff -r 9496c8588862 -r 0368352711b5 bin/sh/expand.h
--- a/bin/sh/expand.h Wed Apr 26 14:56:54 2017 +0000
+++ b/bin/sh/expand.h Wed Apr 26 17:43:33 2017 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: expand.h,v 1.20 2017/03/20 11:26:07 kre Exp $ */
+/* $NetBSD: expand.h,v 1.21 2017/04/26 17:43:33 christos Exp $ */
/*-
* Copyright (c) 1991, 1993
@@ -63,6 +63,5 @@
void expandhere(union node *, int);
void expandarg(union node *, struct arglist *, int);
void expari(int);
-int patmatch(char *, char *, int);
void rmescapes(char *);
int casematch(union node *, char *);
Home |
Main Index |
Thread Index |
Old Index