Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/bin/sh Part 2 of pattern matching (glob etc) fixes.



details:   https://anonhg.NetBSD.org/src/rev/bbb37805b98b
branches:  trunk
changeset: 363355:bbb37805b98b
user:      kre <kre%NetBSD.org@localhost>
date:      Sun Jul 22 23:07:48 2018 +0000

description:
Part 2 of pattern matching (glob etc) fixes.

Attempt to correctly deal with \ (both when it is a literal,
in appropriate cases, and when it appears as CTLESC when it was
detected as a quoting character during parsing).

In a pattern, in sh, no quoted character can ever be anything other
than a literal character.   This is quite different than regular
expressions, and even different than other uses of glob matching,
where shell quoting is not an issue.

In something like

        ls ?\*.c

the ? is a meta-character, the * is a literal (it was quoted).  This
is nothing new, sh has handled that properly for ever.

But the same happens with
        VAR='?\*.c'
and
        ls $VAR

which has not always been handled correctly.   Of course, in

        ls "$VAR"

nothing in VAR is a meta-character (the entire expansion is quoted)
so even the '\' must match literally (or more accurately, no matching
happens - VAR simply contains an "unusual" filename).  But if it had
been

        ls *"$VAR"

then we would be looking for filenames that end with the literal 5
characters that make up $VAR.

The same kinds of things are requires of matching patterns in case
statements, and sub-strings with the % and # operators in variable
expansions.

While here, the final remnant of the ancient !! pattern matching
hack has been removed (the code that actually implemented it was
long gone, but one small piece remained, not doing any real harm,
but potentially wasting time - if someone gave a pattern which would
once have invoked that hack.)

diffstat:

 bin/sh/expand.c |  79 +++++++++++++++++++++++++++++++++++++++++++++++++-------
 bin/sh/parser.c |   8 +++--
 2 files changed, 74 insertions(+), 13 deletions(-)

diffs (174 lines):

diff -r c6c70f79c651 -r bbb37805b98b bin/sh/expand.c
--- a/bin/sh/expand.c   Sun Jul 22 21:16:58 2018 +0000
+++ b/bin/sh/expand.c   Sun Jul 22 23:07:48 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: expand.c,v 1.126 2018/07/22 21:16:58 kre Exp $ */
+/*     $NetBSD: expand.c,v 1.127 2018/07/22 23:07:48 kre Exp $ */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)expand.c   8.5 (Berkeley) 5/15/95";
 #else
-__RCSID("$NetBSD: expand.c,v 1.126 2018/07/22 21:16:58 kre Exp $");
+__RCSID("$NetBSD: expand.c,v 1.127 2018/07/22 23:07:48 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -927,7 +927,9 @@
                                        varlen++;
                        } else {
                                while (*val) {
-                                       if (quotes && syntax[(int)*val] == CCTL)
+                                       if (quotes && (varflags & VSQUOTE) &&
+                                           (syntax[(int)*val] == CCTL ||
+                                            syntax[(int)*val] == CBACK))
                                                STPUTC(CTLESC, expdest);
                                        STPUTC(*val++, expdest);
                                }
@@ -1465,22 +1467,59 @@
                        metaflag = 1;
                else if (*p == '[') {
                        q = p + 1;
-                       if (*q == '!')
+                       if (*q == '!' || *q == '^')
                                q++;
                        for (;;) {
                                while (*q == CTLQUOTEMARK || *q == CTLNONL)
                                        q++;
-                               if (*q == CTLESC)
+                               if (*q == ']') {
                                        q++;
-                               if (*q == '/' || *q == '\0')
-                                       break;
-                               if (*++q == ']') {
                                        metaflag = 1;
                                        break;
                                }
+                               if (*q == '[' && q[1] == ':') {
+                                       /*
+                                        * character class, look for :] ending
+                                        * also stop on ']' (end bracket expr)
+                                        * or '\0' or '/' (end pattern)
+                                        */
+                                       while (*++q != '\0' && *q != ']' &&
+                                           *q != '/') {
+                                               if (*q == CTLESC) {
+                                                       if (*++q == '\0')
+                                                               break;
+                                                       if (*q == '/')
+                                                               break;
+                                               } else if (*q == ':' &&
+                                                   q[1] == ']')
+                                                       break;
+                                       }
+                                       if (*q == ':') {
+                                               /*
+                                                * stopped at ':]'
+                                                * still in [...]
+                                                * skip ":]" and continue;
+                                                */
+                                               q += 2;
+                                               continue;
+                                       }
+
+                                       /* done at end of pattern, not [...] */
+                                       if (*q == '\0' || *q == '/')
+                                               break;
+
+                                       /* found the ']', we have a [...] */
+                                       metaflag = 1;
+                                       q++;    /* skip ']' */
+                                       break;
+                               }
+                               if (*q == CTLESC)
+                                       q++;
+                               /* end of pattern cannot be escaped */
+                               if (*q == '/' || *q == '\0')
+                                       break;
+                               q++;
                        }
-               } else if (*p == '!' && p[1] == '!'     && (p == name || p[-1] == '/')) {
-                       metaflag = 1;
                } else if (*p == '\0')
                        break;
                else if (*p == CTLQUOTEMARK || *p == CTLNONL)
@@ -1707,6 +1746,10 @@
        for (;;) {
                switch (c = *p++) {
                case '\0':
+                       if (squoted && *q == CTLESC) {
+                               if (q[1] == '\0')
+                                       q++;
+                       }
                        if (*q != '\0')
                                goto backtrack;
                        VTRACE(DBG_MATCH, ("match\n"));
@@ -1714,6 +1757,16 @@
                case CTLESC:
                        if (squoted && *q == CTLESC)
                                q++;
+                       if (*p == '\0' && *q == '\0') {
+                               VTRACE(DBG_MATCH, ("match-\\\n"));
+                               return 1;
+                       }
+                       if (*q++ != *p++)
+                               goto backtrack;
+                       break;
+               case '\\':
+                       if (squoted && *q == CTLESC)
+                               q++;
                        if (*q++ != *p++)
                                goto backtrack;
                        break;
@@ -1747,6 +1800,10 @@
                                        q++;
                                }
                        }
+                       if (c == CTLESC && p[1] == '\0') {
+                               VTRACE(DBG_MATCH, ("match+\\\n"));
+                               return 1;
+                       }
                        /*
                         * First try the shortest match for the '*' that
                         * could work. We can forget any earlier '*' since
@@ -1798,6 +1855,8 @@
                                VTRACE(DBG_MATCH, ("[]fail\n"));
                                return 0;
                        }
+                       if (squoted && *q == CTLESC)
+                               q++;
                        chr = (unsigned char)*q++;
                        c = *p++;
                        do {
diff -r c6c70f79c651 -r bbb37805b98b bin/sh/parser.c
--- a/bin/sh/parser.c   Sun Jul 22 21:16:58 2018 +0000
+++ b/bin/sh/parser.c   Sun Jul 22 23:07:48 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: parser.c,v 1.148 2018/07/20 22:47:26 kre Exp $ */
+/*     $NetBSD: parser.c,v 1.149 2018/07/22 23:07:48 kre Exp $ */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)parser.c   8.7 (Berkeley) 5/16/95";
 #else
-__RCSID("$NetBSD: parser.c,v 1.148 2018/07/20 22:47:26 kre Exp $");
+__RCSID("$NetBSD: parser.c,v 1.149 2018/07/22 23:07:48 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -1817,8 +1817,10 @@
                        }
                        quotef = 1;     /* current token is quoted */
                        if (ISDBLQUOTE() && c != '\\' && c != '`' &&
-                           c != '$' && (c != '"' || magicq))
+                           c != '$' && (c != '"' || magicq)) {
+                               USTPUTC(CTLESC, out);
                                USTPUTC('\\', out);
+                       }
                        if (SQSYNTAX[c] == CCTL || SQSYNTAX[c] == CSBACK)
                                USTPUTC(CTLESC, out);
                        else if (!magicq) {



Home | Main Index | Thread Index | Old Index