Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/bin/sh First pass at fixing some of the more arcane pattern ...



details:   https://anonhg.NetBSD.org/src/rev/ae6c7da7cf1f
branches:  trunk
changeset: 320806:ae6c7da7cf1f
user:      kre <kre%NetBSD.org@localhost>
date:      Fri Jul 20 22:47:26 2018 +0000

description:
First pass at fixing some of the more arcane pattern matching
possibilities that we do not currently handle all that well.

This mostly means (for now) making sure that quoted pattern
magic characters (as well as quoted sh syntax magic chars)
are properly marked, so they remain known as being quoted,
and do not turn into pattern magic.   Also, make sure that an
unquoted \ in a pattern always quotes whatever comes next
(which, unlike in regular expressions, includes inside []
matches),

diffstat:

 bin/sh/expand.c |  36 ++++++++++++++++++++++++++----------
 bin/sh/parser.c |   9 +++++----
 bin/sh/syntax.c |  10 ++++++----
 3 files changed, 37 insertions(+), 18 deletions(-)

diffs (185 lines):

diff -r 5009aa0c3f50 -r ae6c7da7cf1f bin/sh/expand.c
--- a/bin/sh/expand.c   Fri Jul 20 20:50:34 2018 +0000
+++ b/bin/sh/expand.c   Fri Jul 20 22:47:26 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: expand.c,v 1.123 2018/06/22 18:19:41 kre Exp $ */
+/*     $NetBSD: expand.c,v 1.124 2018/07/20 22:47:26 kre Exp $ */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)expand.c   8.5 (Berkeley) 5/15/95";
 #else
-__RCSID("$NetBSD: expand.c,v 1.123 2018/06/22 18:19:41 kre Exp $");
+__RCSID("$NetBSD: expand.c,v 1.124 2018/07/20 22:47:26 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -1109,7 +1109,7 @@
        int num;
        char *p;
        int i;
-       char sep;
+       int sep;
        char **ap;
        char const *syntax;
 
@@ -1167,10 +1167,14 @@
                        STRTODEST(p);
                        if (!*ap)
                                break;
-                       if (sep)
+                       if (sep) {
+                               if (quoted && (flag & (EXP_GLOB|EXP_CASE)) &&
+                                   (SQSYNTAX[sep] == CCTL || SQSYNTAX[sep] == CSBACK))
+                                       STPUTC(CTLESC, expdest);
                                STPUTC(sep, expdest);
-                       else if ((flag & (EXP_SPLIT|EXP_IN_QUOTES)) == EXP_SPLIT
-                           && !quoted && **ap != '\0')
+                       } else
+                           if ((flag & (EXP_SPLIT|EXP_IN_QUOTES)) == EXP_SPLIT
+                             && !quoted && **ap != '\0')
                                STPUTC('\0', expdest);
                }
                return;
@@ -1749,19 +1753,31 @@
                        int invert, found;
                        unsigned char chr;
 
+                       /*
+                        * First quick check to see if there is a
+                        * possible matching ']' - if not, then this
+                        * is not a char class, and the '[' is just
+                        * a literal '['.
+                        *
+                        * This check will not detect all non classes, but
+                        * that's OK - It just means that we execute the
+                        * harder code sometimes when it it cannot succeed.
+                        */
                        endp = p;
-                       if (*endp == '!')
+                       if (*endp == '!' || *endp == '^')
                                endp++;
                        for (;;) {
                                while (*endp == CTLQUOTEMARK || *endp==CTLNONL)
                                        endp++;
                                if (*endp == '\0')
-                                       goto dft;               /* no matching ] */
+                                       goto dft;       /* no matching ] */
                                if (*endp == CTLESC)
                                        endp++;
                                if (*++endp == ']')
                                        break;
                        }
+                       /* end shortcut */
+
                        invert = 0;
                        savep = p, saveq = q;
                        invert = 0;
@@ -1789,12 +1805,12 @@
                                                continue;
                                        }
                                }
-                               if (c == CTLESC)
+                               if (c == CTLESC || c == '\\')
                                        c = *p++;
                                wc = (unsigned char)c;
                                if (*p == '-' && p[1] != ']') {
                                        p++;
-                                       if (*p == CTLESC)
+                                       if (*p == CTLESC || *p == '\\')
                                                p++;
                                        wc2 = (unsigned char)*p++;
                                        if (   collate_range_cmp(chr, wc) >= 0
diff -r 5009aa0c3f50 -r ae6c7da7cf1f bin/sh/parser.c
--- a/bin/sh/parser.c   Fri Jul 20 20:50:34 2018 +0000
+++ b/bin/sh/parser.c   Fri Jul 20 22:47:26 2018 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: parser.c,v 1.147 2018/07/13 22:43:44 kre Exp $ */
+/*     $NetBSD: parser.c,v 1.148 2018/07/20 22:47:26 kre Exp $ */
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)parser.c   8.7 (Berkeley) 5/16/95";
 #else
-__RCSID("$NetBSD: parser.c,v 1.147 2018/07/13 22:43:44 kre Exp $");
+__RCSID("$NetBSD: parser.c,v 1.148 2018/07/20 22:47:26 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -1770,7 +1770,7 @@
        for (c = firstc ;; c = pgetc_macro()) { /* until of token */
                if (syntax == ARISYNTAX)
                        out = insert_elided_nl(out);
-               CHECKSTRSPACE(4, out);  /* permit 4 calls to USTPUTC */
+               CHECKSTRSPACE(6, out);  /* permit 6 calls to USTPUTC */
                switch (syntax[c]) {
                case CNL:       /* '\n' */
                        if (syntax == BASESYNTAX && varnest == 0)
@@ -1788,6 +1788,7 @@
                                out = readcstyleesc(out);
                                continue;
                        }
+                       USTPUTC(CTLESC, out);
                        /* FALLTHROUGH */
                case CWORD:
                        USTPUTC(c, out);
@@ -1818,7 +1819,7 @@
                        if (ISDBLQUOTE() && c != '\\' && c != '`' &&
                            c != '$' && (c != '"' || magicq))
                                USTPUTC('\\', out);
-                       if (SQSYNTAX[c] == CCTL)
+                       if (SQSYNTAX[c] == CCTL || SQSYNTAX[c] == CSBACK)
                                USTPUTC(CTLESC, out);
                        else if (!magicq) {
                                USTPUTC(CTLQUOTEMARK, out);
diff -r 5009aa0c3f50 -r ae6c7da7cf1f bin/sh/syntax.c
--- a/bin/sh/syntax.c   Fri Jul 20 20:50:34 2018 +0000
+++ b/bin/sh/syntax.c   Fri Jul 20 22:47:26 2018 +0000
@@ -1,7 +1,7 @@
-/*     $NetBSD: syntax.c,v 1.5 2017/08/21 13:20:49 kre Exp $   */
+/*     $NetBSD: syntax.c,v 1.6 2018/07/20 22:47:26 kre Exp $   */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: syntax.c,v 1.5 2017/08/21 13:20:49 kre Exp $");
+__RCSID("$NetBSD: syntax.c,v 1.6 2018/07/20 22:47:26 kre Exp $");
 
 #include <limits.h>
 #include "shell.h"
@@ -46,7 +46,7 @@
     set('`', CBQUOTE)
     set('$', CVAR)
     set('}', CENDVAR)
-    /* ':/' for tilde expansion, '-' for [a\-x] pattern ranges */
+    /* ':/' for tilde expansion, '-]' for [a\-x] pattern ranges */
     set('!', CCTL)
     set('*', CCTL)
     set('?', CCTL)
@@ -56,6 +56,7 @@
     set(':', CCTL)
     set('/', CCTL)
     set('-', CCTL)
+    set(']', CCTL)
 };
 
 /* syntax table used when in single quotes */
@@ -64,7 +65,7 @@
     set('\n', CNL)
     set('\'', CSQUOTE)
     set('\\', CSBACK)
-    /* ':/' for tilde expansion, '-' for [a\-x] pattern ranges */
+    /* ':/' for tilde expansion, '-]' for [a\-x] pattern ranges */
     set('!', CCTL)
     set('*', CCTL)
     set('?', CCTL)
@@ -74,6 +75,7 @@
     set(':', CCTL)
     set('/', CCTL)
     set('-', CCTL)
+    set(']', CCTL)
 };
 
 /* syntax table used when in arithmetic */



Home | Main Index | Thread Index | Old Index