Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/external/historical/nawk/dist PR/54424: Martijn Dekker: awk:...



details:   https://anonhg.NetBSD.org/src/rev/06b4b4513bfa
branches:  trunk
changeset: 1002457:06b4b4513bfa
user:      christos <christos%NetBSD.org@localhost>
date:      Thu Aug 01 06:22:52 2019 +0000

description:
PR/54424: Martijn Dekker: awk: broken character classes in UTF-8 locale:
only the first matches
Pick up some of the fixes from upstream:
        - posix paren matching
        - print \v \a
        - some more fatal handling
        - init all the character range.

diffstat:

 external/historical/nawk/dist/b.c |  29 +++++++++++++++++++++++++++--
 1 files changed, 27 insertions(+), 2 deletions(-)

diffs (83 lines):

diff -r a014794531a4 -r 06b4b4513bfa external/historical/nawk/dist/b.c
--- a/external/historical/nawk/dist/b.c Thu Aug 01 06:14:45 2019 +0000
+++ b/external/historical/nawk/dist/b.c Thu Aug 01 06:22:52 2019 +0000
@@ -31,6 +31,7 @@
 #define        DEBUG
 
 #include <ctype.h>
+#include <limits.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
@@ -333,6 +334,10 @@
                c = '\r';
        else if (c == 'b')
                c = '\b';
+       else if (c == 'v')
+               c = '\v';
+       else if (c == 'a')
+               c = '\a';
        else if (c == '\\')
                c = '\\';
        else if (c == 'x') {    /* hexadecimal goo follows */
@@ -978,6 +983,7 @@
        if (secondnum < 0) {    /* means {n,} -> repeat n-1 times followed by PLUS */
                if (firstnum < 2) {
                        /* 0 or 1: should be handled before you get here */
+                       FATAL("internal error");
                } else {
                        return replace_repeat(reptok, reptoklen, atom, atomlen,
                                firstnum, secondnum, REPEAT_PLUS_APPENDED);
@@ -998,6 +1004,7 @@
                return replace_repeat(reptok, reptoklen, atom, atomlen,
                                        firstnum, secondnum, REPEAT_WITH_Q);
        } else {        /* Error - shouldn't be here (n>m) */
+               FATAL("internal error");
        }
        return 0;
 }
@@ -1013,6 +1020,7 @@
        int i;
        int num, m, commafound, digitfound;
        const uschar *startreptok;
+       static int parens = 0;
 
 rescan:
        starttok = prestr;
@@ -1026,9 +1034,18 @@
        case '\0': prestr--; return '\0';
        case '^':
        case '$':
+               return c;
        case '(':
+               parens++;
+               return c;
        case ')':
-               return c;
+               if (parens) {
+                       parens--;
+                       return c;
+               }
+               /* unmatched close parenthesis; per POSIX, treat as literal */
+               rlxval = c;
+               return CHAR;
        case '\\':
                rlxval = quoted(&prestr);
                return CHAR;
@@ -1064,7 +1081,15 @@
                                if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
                                    prestr[2 + cc->cc_namelen] == ']') {
                                        prestr += cc->cc_namelen + 3;
-                                       for (i = 1; i < NCHARS; i++) {
+                                       /*
+                                        * BUG: We begin at 1, instead of 0, since we
+                                        * would otherwise prematurely terminate the
+                                        * string for classes like [[:cntrl:]]. This
+                                        * means that we can't match the NUL character,
+                                        * not without first adapting the entire
+                                        * program to track each string's length.
+                                        */
+                                       for (i = 1; i <= UCHAR_MAX; i++) {
                                                if (!adjbuf(&buf, &bufsz, bp-buf+1, 100, &bp, "relex2"))
                                                    FATAL("out of space for reg expr %.10s...", lastre);
                                                if (cc->cc_func(i)) {



Home | Main Index | Thread Index | Old Index