Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/external/historical/nawk/dist PR/46155: Miguel Pi?eiro Jr: F...
details: https://anonhg.NetBSD.org/src/rev/dfe7a68b087c
branches: trunk
changeset: 777939:dfe7a68b087c
user: christos <christos%NetBSD.org@localhost>
date: Sat Mar 10 19:18:48 2012 +0000
description:
PR/46155: Miguel Pi?eiro Jr: Fix RS processing. Apply the gawk-like patch
from the excellent PR. Many thanks for all the work you put on this,
explanation, tests, and patch!
diffstat:
external/historical/nawk/dist/b.c | 90 +++++++++++++++++++++++++++++++++++
external/historical/nawk/dist/lib.c | 78 ++++++++++++------------------
external/historical/nawk/dist/proto.h | 5 +-
external/historical/nawk/dist/run.c | 25 +++++----
4 files changed, 138 insertions(+), 60 deletions(-)
diffs (truncated from 342 to 300 lines):
diff -r 7112af47bebc -r dfe7a68b087c external/historical/nawk/dist/b.c
--- a/external/historical/nawk/dist/b.c Sat Mar 10 19:11:36 2012 +0000
+++ b/external/historical/nawk/dist/b.c Sat Mar 10 19:18:48 2012 +0000
@@ -624,6 +624,96 @@
return (0);
}
+
+/*
+ * NAME
+ * fnematch
+ *
+ * DESCRIPTION
+ * A stream-fed version of nematch which transfers characters to a
+ * null-terminated buffer. All characters up to and including the last
+ * character of the matching text or EOF are placed in the buffer. If
+ * a match is found, patbeg and patlen are set appropriately.
+ *
+ * RETURN VALUES
+ * 0 No match found.
+ * 1 Match found.
+ */
+
+int fnematch(fa *pfa, FILE *f, uschar **pbuf, int *pbufsize, int quantum)
+{
+ uschar *buf = *pbuf;
+ int bufsize = *pbufsize;
+ int c, i, j, k, ns, s;
+
+ s = pfa->initstat;
+ assert(s < pfa->state_count);
+ patlen = 0;
+
+ /*
+ * All indices relative to buf.
+ * i <= j <= k <= bufsize
+ *
+ * i: origin of active substring
+ * j: current character
+ * k: destination of next getc()
+ */
+ i = -1, k = 0;
+ do {
+ j = i++;
+ do {
+ if (++j == k) {
+ if (k == bufsize)
+ if (!adjbuf(&buf, &bufsize, bufsize+1, quantum, 0, "fnematch"))
+ FATAL("stream '%.30s...' too long", buf);
+ buf[k++] = (c = getc(f)) != EOF ? c : 0;
+ }
+ c = buf[j];
+ /* assert(c < NCHARS); */
+
+ if ((ns = pfa->gototab[s][c]) != 0)
+ s = ns;
+ else
+ s = cgoto(pfa, s, c);
+ assert(s < pfa->state_count);
+
+ if (pfa->out[s]) { /* final state */
+ patlen = j - i + 1;
+ if (c == 0) /* don't count $ */
+ patlen--;
+ }
+ } while (buf[j] && s != 1);
+ s = 2;
+ } while (buf[i] && !patlen);
+
+ /* adjbuf() may have relocated a resized buffer. Inform the world. */
+ *pbuf = buf;
+ *pbufsize = bufsize;
+
+ if (patlen) {
+ patbeg = buf + i;
+ /*
+ * Under no circumstances is the last character fed to
+ * the automaton part of the match. It is EOF's nullbyte,
+ * or it sent the automaton into a state with no further
+ * transitions available (s==1), or both. Room for a
+ * terminating nullbyte is guaranteed.
+ *
+ * ungetc any chars after the end of matching text
+ * (except for EOF's nullbyte, if present) and null
+ * terminate the buffer.
+ */
+ do
+ if (buf[--k] && ungetc(buf[k], f) == EOF)
+ FATAL("unable to ungetc '%c'", buf[k]);
+ while (k > i + patlen);
+ buf[k] = 0;
+ return 1;
+ }
+ else
+ return 0;
+}
+
Node *reparse(const char *p) /* parses regular expression pointed to by p */
{ /* uses relex() to scan regular expression */
Node *np;
diff -r 7112af47bebc -r dfe7a68b087c external/historical/nawk/dist/lib.c
--- a/external/historical/nawk/dist/lib.c Sat Mar 10 19:11:36 2012 +0000
+++ b/external/historical/nawk/dist/lib.c Sat Mar 10 19:18:48 2012 +0000
@@ -38,6 +38,7 @@
char EMPTY[] = { '\0' };
FILE *infile = NULL;
+int innew; /* 1 = infile has not been read by readrec */
char *file = EMPTY;
uschar *record;
int recsize = RECSIZE;
@@ -104,6 +105,7 @@
argno++;
}
infile = stdin; /* no filenames, so use stdin */
+ innew = 1;
}
static int firsttime = 1;
@@ -146,9 +148,12 @@
infile = stdin;
else if ((infile = fopen(file, "r")) == NULL)
FATAL("can't open file %s", file);
+ innew = 1;
setfval(fnrloc, 0.0);
}
- c = readrec(&buf, &bufsize, infile);
+ c = readrec(&buf, &bufsize, infile, innew);
+ if (innew)
+ innew = 0;
if (c != 0 || buf[0] != '\0') { /* normal record */
if (isrecord) {
if (freeable(fldtab[0]))
@@ -186,9 +191,9 @@
argno++;
}
-int readrec(uschar **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */
+int readrec(uschar **pbuf, int *pbufsize, FILE *inf, int newflag) /* read one record into buf */
{
- int sep, c;
+ int sep, c, isrec, found, tempstat;
uschar *rr, *buf = *pbuf;
int bufsize = *pbufsize;
size_t len;
@@ -202,48 +207,26 @@
FATAL("field separator %.10s... is too long", *FS);
memcpy(inputFS, *FS, len_inputFS);
}
- if ((sep = **RS) == 0) {
- sep = '\n';
- while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
- ;
- if (c != EOF)
- ungetc(c, inf);
- } else if ((*RS)[1]) {
+ if (**RS && (*RS)[1]) {
fa *pfa = makedfa(*RS, 1);
- int tempstat = pfa->initstat;
- char *brr = buf;
- char *rrr = NULL;
- int x;
- for (rr = buf; ; ) {
- while ((c = getc(inf)) != EOF) {
- if (rr-buf+3 > bufsize)
- if (!adjbuf(&buf, &bufsize, 3+rr-buf,
- recsize, &rr, "readrec 2"))
- FATAL("input record `%.30s...'"
- " too long", buf);
- *rr++ = c;
- *rr = '\0';
- if (!(x = nematch(pfa, brr))) {
- pfa->initstat = tempstat;
- if (rrr) {
- rr = rrr;
- ungetc(c, inf);
- break;
- }
- } else {
- pfa->initstat = 2;
- brr = rrr = rr = patbeg;
- }
- }
- if (rrr || c == EOF)
- break;
- if ((c = getc(inf)) == '\n' || c == EOF)
- /* 2 in a row */
- break;
- *rr++ = '\n';
- *rr++ = c;
+ if (newflag)
+ found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+ else {
+ tempstat = pfa->initstat;
+ pfa->initstat = 2;
+ found = fnematch(pfa, inf, &buf, &bufsize, recsize);
+ pfa->initstat = tempstat;
}
+ if (found)
+ *patbeg = 0;
} else {
+ if ((sep = **RS) == 0) {
+ sep = '\n';
+ while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
+ ;
+ if (c != EOF)
+ ungetc(c, inf);
+ }
for (rr = buf; ; ) {
for (; (c=getc(inf)) != sep && c != EOF; ) {
if (rr-buf+1 > bufsize)
@@ -264,14 +247,15 @@
*rr++ = '\n';
*rr++ = c;
}
+ if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
+ FATAL("input record `%.30s...' too long", buf);
+ *rr = 0;
}
- if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
- FATAL("input record `%.30s...' too long", buf);
- *rr = 0;
- dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
*pbuf = buf;
*pbufsize = bufsize;
- return c == EOF && rr == buf ? 0 : 1;
+ isrec = *buf || !feof(inf);
+ dprintf( ("readrec saw <%s>, returns %d\n", buf, isrec) );
+ return isrec;
}
char *getargv(int n) /* get ARGV[n] */
diff -r 7112af47bebc -r dfe7a68b087c external/historical/nawk/dist/proto.h
--- a/external/historical/nawk/dist/proto.h Sat Mar 10 19:11:36 2012 +0000
+++ b/external/historical/nawk/dist/proto.h Sat Mar 10 19:18:48 2012 +0000
@@ -54,6 +54,7 @@
extern int match(fa *, const char *);
extern int pmatch(fa *, const char *);
extern int nematch(fa *, const char *);
+extern int fnematch(fa *, FILE *, uschar **, int *, int);
extern Node *reparse(const char *);
extern Node *regexp(void);
extern Node *primary(void);
@@ -122,7 +123,7 @@
extern void growfldtab(int n);
extern int getrec(uschar **, int *, int);
extern void nextfile(void);
-extern int readrec(uschar **buf, int *bufsize, FILE *inf);
+extern int readrec(uschar **buf, int *bufsize, FILE *inf, int newflag);
extern char *getargv(int);
extern void setclvar(char *);
extern void fldbld(void);
@@ -191,7 +192,7 @@
extern Cell *printstat(Node **, int);
extern Cell *nullproc(Node **, int);
extern FILE *redirect(int, Node *);
-extern FILE *openfile(int, const char *);
+extern FILE *openfile(int, const char *, int *);
extern const char *filename(FILE *);
extern Cell *closefile(Node **, int);
extern void closeall(void);
diff -r 7112af47bebc -r dfe7a68b087c external/historical/nawk/dist/run.c
--- a/external/historical/nawk/dist/run.c Sat Mar 10 19:11:36 2012 +0000
+++ b/external/historical/nawk/dist/run.c Sat Mar 10 19:18:48 2012 +0000
@@ -406,7 +406,7 @@
FILE *fp;
uschar *buf;
int bufsize = recsize;
- int mode;
+ int mode, newflag;
if ((buf = malloc(bufsize)) == NULL)
FATAL("out of memory in getline");
@@ -418,12 +418,12 @@
mode = ptoi(a[1]);
if (mode == '|') /* input pipe */
mode = LE; /* arbitrary flag */
- fp = openfile(mode, getsval(x));
+ fp = openfile(mode, getsval(x), &newflag);
tempfree(x);
if (fp == NULL)
n = -1;
else
- n = readrec(&buf, &bufsize, fp);
+ n = readrec(&buf, &bufsize, fp, newflag);
if (n <= 0) {
;
} else if (a[0] != NULL) { /* getline var <file */
@@ -1623,7 +1623,7 @@
if (isrec(x) || strlen(getsval(x)) == 0) {
flush_all(); /* fflush() or fflush("") -> all */
u = 0;
- } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL)
+ } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
u = -1;
else
u = fflush(fp);
@@ -1715,7 +1715,7 @@
x = execute(b);
Home |
Main Index |
Thread Index |
Old Index