Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/external/historical/nawk/dist PR/54424: Martijn Dekker: awk:...
details: https://anonhg.NetBSD.org/src/rev/06b4b4513bfa
branches: trunk
changeset: 1002457:06b4b4513bfa
user: christos <christos%NetBSD.org@localhost>
date: Thu Aug 01 06:22:52 2019 +0000
description:
PR/54424: Martijn Dekker: awk: broken character classes in UTF-8 locale:
only the first matches
Pick up some of the fixes from upstream:
- posix paren matching
- print \v \a
- some more fatal handling
- init all the character range.
diffstat:
external/historical/nawk/dist/b.c | 29 +++++++++++++++++++++++++++--
1 files changed, 27 insertions(+), 2 deletions(-)
diffs (83 lines):
diff -r a014794531a4 -r 06b4b4513bfa external/historical/nawk/dist/b.c
--- a/external/historical/nawk/dist/b.c Thu Aug 01 06:14:45 2019 +0000
+++ b/external/historical/nawk/dist/b.c Thu Aug 01 06:22:52 2019 +0000
@@ -31,6 +31,7 @@
#define DEBUG
#include <ctype.h>
+#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -333,6 +334,10 @@
c = '\r';
else if (c == 'b')
c = '\b';
+ else if (c == 'v')
+ c = '\v';
+ else if (c == 'a')
+ c = '\a';
else if (c == '\\')
c = '\\';
else if (c == 'x') { /* hexadecimal goo follows */
@@ -978,6 +983,7 @@
if (secondnum < 0) { /* means {n,} -> repeat n-1 times followed by PLUS */
if (firstnum < 2) {
/* 0 or 1: should be handled before you get here */
+ FATAL("internal error");
} else {
return replace_repeat(reptok, reptoklen, atom, atomlen,
firstnum, secondnum, REPEAT_PLUS_APPENDED);
@@ -998,6 +1004,7 @@
return replace_repeat(reptok, reptoklen, atom, atomlen,
firstnum, secondnum, REPEAT_WITH_Q);
} else { /* Error - shouldn't be here (n>m) */
+ FATAL("internal error");
}
return 0;
}
@@ -1013,6 +1020,7 @@
int i;
int num, m, commafound, digitfound;
const uschar *startreptok;
+ static int parens = 0;
rescan:
starttok = prestr;
@@ -1026,9 +1034,18 @@
case '\0': prestr--; return '\0';
case '^':
case '$':
+ return c;
case '(':
+ parens++;
+ return c;
case ')':
- return c;
+ if (parens) {
+ parens--;
+ return c;
+ }
+ /* unmatched close parenthesis; per POSIX, treat as literal */
+ rlxval = c;
+ return CHAR;
case '\\':
rlxval = quoted(&prestr);
return CHAR;
@@ -1064,7 +1081,15 @@
if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
prestr[2 + cc->cc_namelen] == ']') {
prestr += cc->cc_namelen + 3;
- for (i = 1; i < NCHARS; i++) {
+ /*
+ * BUG: We begin at 1, instead of 0, since we
+ * would otherwise prematurely terminate the
+ * string for classes like [[:cntrl:]]. This
+ * means that we can't match the NUL character,
+ * not without first adapting the entire
+ * program to track each string's length.
+ */
+ for (i = 1; i <= UCHAR_MAX; i++) {
if (!adjbuf(&buf, &bufsz, bp-buf+1, 100, &bp, "relex2"))
FATAL("out of space for reg expr %.10s...", lastre);
if (cc->cc_func(i)) {
Home |
Main Index |
Thread Index |
Old Index