Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/sed Recognize \oOOO \dDD \xXX plus the other regular...



details:   https://anonhg.NetBSD.org/src/rev/93f7d9c5ab1e
branches:  trunk
changeset: 464422:93f7d9c5ab1e
user:      christos <christos%NetBSD.org@localhost>
date:      Sat Oct 05 20:23:55 2019 +0000

description:
Recognize \oOOO \dDD \xXX plus the other regular 'C' backslash escapes like
gnu sed does, except when inside regex []. (Gnu sed translates those too,
unless --posix is specified).

diffstat:

 usr.bin/sed/compile.c |  151 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 146 insertions(+), 5 deletions(-)

diffs (208 lines):

diff -r 2edcf7eef57b -r 93f7d9c5ab1e usr.bin/sed/compile.c
--- a/usr.bin/sed/compile.c     Sat Oct 05 20:22:36 2019 +0000
+++ b/usr.bin/sed/compile.c     Sat Oct 05 20:23:55 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: compile.c,v 1.47 2016/04/05 00:13:03 christos Exp $    */
+/*     $NetBSD: compile.c,v 1.48 2019/10/05 20:23:55 christos Exp $    */
 
 /*-
  * Copyright (c) 1992 Diomidis Spinellis.
@@ -38,7 +38,7 @@
 #endif
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: compile.c,v 1.47 2016/04/05 00:13:03 christos Exp $");
+__RCSID("$NetBSD: compile.c,v 1.48 2019/10/05 20:23:55 christos Exp $");
 #ifdef __FBSDID
 __FBSDID("$FreeBSD: head/usr.bin/sed/compile.c 259132 2013-12-09 18:57:20Z eadler $");
 #endif
@@ -89,6 +89,7 @@
                 *findlabel(char *);
 static void      fixuplabel(struct s_command *, struct s_command *);
 static void      uselabel(void);
+static void      parse_escapes(char *);
 
 /*
  * Command specification.  This is used to drive the command parser.
@@ -463,6 +464,7 @@
        if (case_insensitive)
                flags |= REG_ICASE;
        rep = xmalloc(sizeof(regex_t));
+       parse_escapes(re);
        if ((eval = regcomp(rep, re, flags)) != 0)
                errx(1, "%lu: %s: RE error: %s",
                                linenum, fname, strregerror(eval, rep));
@@ -471,6 +473,134 @@
        return (rep);
 }
 
+static char
+cton(char c, int base)
+{
+       switch (c) {
+       case '0': case '1': case '2': case '3': case '4':
+       case '5': case '6': case '7':
+               return (char)(c - '0');
+       case '8': case '9':
+               return base == 8 ? '?' : (char)(c - '0');
+       case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+               return base == 16 ? (char)(c - 'a' + 10) : '?'; 
+       case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+               return base == 16 ? (char)(c - 'A' + 10) : '?'; 
+       default:
+               return '?';
+       }
+}
+
+static int
+ston(char **pp, char *sp, int base)
+{
+       char *p = *pp, n;
+       int r = cton(p[1], base);
+
+       if (r == '?')
+               return 0;
+
+       p++;
+       while ((n = cton(p[1], base)) != '?' && r < 255) {
+               r = r * base + n;
+               p++;
+       }
+       *sp = (char)r;
+       *pp = p;
+       return 1;
+}
+               
+static int
+unescape(char **pp, char **spp)
+{
+       char *p = *pp;
+       char *sp = *spp;
+
+       switch (*p) {
+       case 'o':
+               if (!ston(&p, sp, 8))
+                       return 0;
+               break;
+       case 'd':
+               if (!ston(&p, sp, 10))
+                       return 0;
+               break;
+       case 'x':
+               if (!ston(&p, sp, 16))
+                       return 0;
+               break;
+       case 'a':
+               *sp = '\a';
+               p++;
+               break;
+#if 0
+       // No, \b RE
+       case 'b':
+               *sp = '\b';
+               break;
+#endif
+       case 'f':
+               *sp = '\f';
+               break;
+       case 'n':
+               *sp = '\n';
+               break;
+       case 'r':
+               *sp = '\r';
+               break;
+       case 'v':
+               *sp = '\v';
+               break;
+       default:
+               return 0;
+       }
+       *spp = sp + 1;
+       *pp = p;
+       return 1;
+}
+
+static void
+parse_escapes(char *buf)
+{
+       char bracket = '\0';
+       char *p, *q;
+
+       p = q = buf;
+
+       for (p = q = buf; *p; p++) {
+               if (*p == '\\' && p[1] && !bracket) {
+                       p++;
+                       if (unescape(&p, &q))
+                               continue;
+                       *q++ = '\\';
+               }
+               switch (*p) {
+               case '[':
+                       if (!bracket)
+                               bracket = *p;
+                       break;
+               case '.':
+               case ':':
+               case '=':
+                       if (bracket == '[' && p[-1] == '[')
+                               bracket = *p;
+                       break;
+               case ']':
+                       if (!bracket)
+                           break;
+                       if (bracket == '[')
+                               bracket = '\0';
+                       else if (p[-2] != bracket && p[-1] == bracket)
+                               bracket = '[';
+                       break;
+               default:
+                       break;
+               }
+               *q++ = *p;
+       }
+       *q = '\0';
+}
+
 /*
  * Compile the substitution string of a regular expression and set res to
  * point to a saved copy of it.  Nsub is the number of parenthesized regular
@@ -508,7 +638,8 @@
                                else
                                        p++;
 
-                               if (*p == '\0') {
+                               switch (*p) {
+                               case '\0':
                                        /*
                                         * This escaped character is continued
                                         * in the next part of the line.  Note
@@ -519,7 +650,9 @@
                                        sawesc = 1;
                                        p--;
                                        continue;
-                               } else if (strchr("123456789", *p) != NULL) {
+                               case '0': case '1': case '2': case '3':
+                               case '4': case '5': case '6': case '7':
+                               case '8': case '9':
                                        *sp++ = '\\';
                                        ref = (u_char)(*p - '0');
                                        if (s->re != NULL &&
@@ -528,8 +661,16 @@
                                                                linenum, fname, *p);
                                        if (s->maxbref < ref)
                                                s->maxbref = ref;
-                               } else if (*p == '&' || *p == '\\')
+                                       break;
+                               case '&':
+                               case '\\':
                                        *sp++ = '\\';
+                                       break;
+                               default:
+                                       if (unescape(&p, &sp))
+                                               continue;
+                                       break;
+                               }
                        } else if (*p == c) {
                                if (*++p == '\0' && more) {
                                        if (cu_fgets(lbuf, sizeof(lbuf), &more))



Home | Main Index | Thread Index | Old Index