Subject: Re: [dM] make manpage mis-describes $(:S///g)
To: None <gnats-bugs@NetBSD.ORG>
From: der Mouse <mouse@Athena.McRCIM.McGill.EDU>
List: netbsd-bugs
Date: 09/13/1996 13:09:19
In PR 2748, I wrote
However, I would much prefer to completely replace the :S code,
since it has other problems; most notably, it does not support
regexps, [...]
I hope to get this code written soon; if and when I do, I'll
send it in as an addendum to this PR, [...]
Herewith said code. I tweaked buf.[ch] because I write code that's
properly const-poisoned (and I use -Wcast-qual -Wwrite-strings to
ensure this), which made me notice that Buf_AddBytes() was missing that
const. The rest of it is pretty straightforward. I actually didn't
touch the S modifier because too many Makefiles probably depend on it
already. Instead, I used C ("change") for the regex version.
With these changes, I see the following:
% cat /tmp/Makefile
VAR = x-foo-bar-foo-bar-x y-foo-two-bar-foo-two-bar-y
MANPAGES = cat1/dig.0 cat3/resolver.0
.PHONY: foo
foo:
@echo VAR = $(VAR)
@echo :S/foo/X/ = $(VAR:S/foo/X/)
@echo :S/foo/X/g = $(VAR:S/foo/X/g)
@echo :S/two/X/ = $(VAR:S/two/X/)
@echo :S/two/X/g = $(VAR:S/two/X/g)
@echo :C/foo/X/ = $(VAR:C/foo/X/)
@echo :C/foo/X/g = $(VAR:C/foo/X/g)
@echo :C/foo/X/1 = $(VAR:C/foo/X/1)
@echo :C/foo/X/1g = $(VAR:C/foo/X/1g)
@echo MANPAGES = $(MANPAGES)
@echo ':C=^cat(.)/(.*)[.]0$$=man\1/\2.\1=' = $(MANPAGES:C=^cat(.)/(.*)[.]0$=man\1/\2.\1=)
% make -f /tmp/Makefile
VAR = x-foo-bar-foo-bar-x y-foo-two-bar-foo-two-bar-y
:S/foo/X/ = x-X-bar-foo-bar-x y-foo-two-bar-foo-two-bar-y
:S/foo/X/g = x-X-bar-X-bar-x y-X-two-bar-X-two-bar-y
:S/two/X/ = x-foo-bar-foo-bar-x y-foo-X-bar-foo-two-bar-y
:S/two/X/g = x-foo-bar-foo-bar-x y-foo-X-bar-foo-X-bar-y
:C/foo/X/ = x-X-bar-foo-bar-x y-X-two-bar-foo-two-bar-y
:C/foo/X/g = x-X-bar-X-bar-x y-X-two-bar-X-two-bar-y
:C/foo/X/1 = x-X-bar-foo-bar-x y-foo-two-bar-foo-two-bar-y
:C/foo/X/1g = x-X-bar-X-bar-x y-foo-two-bar-foo-two-bar-y
MANPAGES = cat1/dig.0 cat3/resolver.0
:C=^cat(.)/(.*)[.]0$=man\1/\2.\1= = man1/dig.1 man3/resolver.3
%
Here are the changes themselves.
--- OLD/usr.bin/make/buf.c Thu Jan 1 00:00:00 1970
+++ NEW/usr.bin/make/buf.c Thu Jan 1 00:00:00 1970
@@ -130,7 +130,7 @@
Buf_AddBytes (bp, numBytes, bytesPtr)
register Buffer bp;
int numBytes;
- Byte *bytesPtr;
+ const Byte *bytesPtr;
{
BufExpand (bp, numBytes);
--- OLD/usr.bin/make/buf.h Thu Jan 1 00:00:00 1970
+++ NEW/usr.bin/make/buf.h Thu Jan 1 00:00:00 1970
@@ -68,7 +68,7 @@
#define BUF_ERROR 256
void Buf_OvAddByte __P((Buffer, int));
-void Buf_AddBytes __P((Buffer, int, Byte *));
+void Buf_AddBytes __P((Buffer, int, const Byte *));
void Buf_UngetByte __P((Buffer, int));
void Buf_UngetBytes __P((Buffer, int, Byte *));
int Buf_GetByte __P((Buffer));
--- OLD/usr.bin/make/make.1 Thu Jan 1 00:00:00 1970
+++ NEW/usr.bin/make/make.1 Thu Jan 1 00:00:00 1970
@@ -452,27 +452,27 @@
.It Cm R
Replaces each word in the variable with everything but its suffix.
.Sm off
-.It Cm S No \&/ Ar old_pattern Xo
-.No \&/ Ar new_pattern
+.It Cm S No \&/ Ar old_string Xo
+.No \&/ Ar new_string
.No \&/ Op Cm g
.Xc
.Sm on
Modify the first occurrence of
-.Ar old_pattern
-in each word to be replaced with
-.Ar new_pattern .
+.Ar old_string
+in the variable's value, replacing it with
+.Ar new_string .
If a
.Ql g
is appended to the last slash of the pattern, all occurrences
in each word are replaced.
If
-.Ar old_pattern
-begins with a carat
+.Ar old_string
+begins with a caret
.Pq Ql ^ ,
-.Ar old_pattern
+.Ar old_string
is anchored at the beginning of each word.
If
-.Ar old_pattern
+.Ar old_string
ends with a dollar sign
.Pq Ql \&$ ,
it is anchored at the end of each word.
@@ -481,7 +481,11 @@
an ampersand
.Pq Ql &
is replaced by
-.Ar old_pattern .
+.Ar old_string
+(without any
+.Ql ^
+or
+.Ql \&$ ) .
Any character may be used as a delimiter for the parts of the modifier
string.
The anchoring, ampersand and delimiter characters may be escaped with a
@@ -494,8 +498,36 @@
.Ar new_string
with the single exception that a backslash is used to prevent the expansion
of a dollar sign
-.Pq Ql \&$
+.Pq Ql \&$ ,
not a preceding dollar sign as is usual.
+.Sm off
+.It Cm C No \&/ Ar pattern Xo
+.No \&/ Ar replacement
+.No \&/ Op Cm 1g
+.Xc
+.Sm on
+The
+.Cm C
+modifier is just like the
+.Cm S
+modifier except that the the old and new strings, instead of being
+simple strings, are a regular expression (see
+.Xr regex 3 )
+and an
+.Xr ed 1 Ns \-style
+replacement string. Normally, the first occurrence of the pattern in
+each word of the value is changed. The
+.Ql 1
+modifier causes the substitution to apply to at most one word; the
+.Ql g
+modifier causes the substitution to apply to as many instances of the
+search pattern as occur in the word or words it is found in. Note that
+.Ql 1
+and
+.Ql g
+are orthogonal; the former specifies whether multiple words are
+potentially affected, the latter whether multiple substitutions can
+potentially occur within each affected word.
.It Cm T
Replaces each word in the variable with its last component.
.It Ar old_string=new_string
--- OLD/usr.bin/make/var.c Thu Jan 1 00:00:00 1970
+++ NEW/usr.bin/make/var.c Thu Jan 1 00:00:00 1970
@@ -89,6 +89,7 @@
*/
#include <ctype.h>
+#include <regex.h>
#include "make.h"
#include "buf.h"
@@ -156,6 +157,16 @@
#define VAR_NO_SUB 8 /* Substitution is non-global and already done */
} VarPattern;
+typedef struct {
+ regex_t re;
+ int nsub;
+ regmatch_t *matches;
+ char *replace;
+ unsigned int global : 1;
+ unsigned int oneword : 1;
+ unsigned int matched : 1;
+ } VarREPattern;
+
static int VarCmp __P((ClientData, ClientData));
static Var *VarFind __P((char *, GNode *, int));
static void VarAdd __P((char *, char *, GNode *));
@@ -1039,6 +1050,128 @@
return(TRUE);
}
+
+/*-
+ *-----------------------------------------------------------------------
+ * VarRegexpSub --
+ * Perform a regex substitution on the given word, placing the
+ * result in the passed buffer.
+ *
+ * Results:
+ * TRUE if a space is needed before more characters are added.
+ *
+ * Side Effects:
+ * None.
+ *
+ *-----------------------------------------------------------------------
+ */
+static Boolean VarRegexpSub(char *word, Boolean addSpace, Buffer buf, ClientData patternp)
+{
+ VarREPattern *pat;
+ int xrv;
+ char *wp;
+ char *rp;
+ int added;
+
+#define MAYBE_ADD_SPACE() do { if (addSpace && !added) { Buf_AddByte(buf,' '); } added = 1; } while (0)
+ added = 0;
+ wp = word;
+ pat = patternp;
+ if (pat->oneword && pat->matched)
+ { xrv = REG_NOMATCH;
+ }
+ else
+ {
+tryagain:;
+ xrv = regexec(&pat->re,wp,pat->nsub,pat->matches,0);
+ }
+ switch (xrv)
+ { case 0:
+ pat->matched = 1;
+ if (pat->matches[0].rm_so > 0)
+ { MAYBE_ADD_SPACE();
+ Buf_AddBytes(buf,pat->matches[0].rm_so,wp);
+ }
+ for (rp=pat->replace;*rp;rp++)
+ { if ( (*rp == '\\') &&
+ ( (rp[1] == '&') ||
+ (rp[1] == '\\') ) )
+ { MAYBE_ADD_SPACE();
+ Buf_AddByte(buf,rp[1]);
+ rp ++;
+ }
+ else if ( (*rp == '&') ||
+ ((*rp == '\\') && isdigit(rp[1])) )
+ { int n;
+ char *subbuf;
+ char zsub;
+ int sublen;
+ char errstr[3];
+ if (*rp == '&')
+ { n = 0;
+ errstr[0] = '&';
+ errstr[1] = '\0';
+ }
+ else
+ { n = rp[1] - '0';
+ errstr[0] = '\\';
+ errstr[1] = rp[1];
+ errstr[2] = '\0';
+ rp ++;
+ }
+ if (n > pat->nsub)
+ { Error("%s in replacement but no such subexpression in expression",&errstr[0]);
+ subbuf = "";
+ sublen = 0;
+ }
+ else if ((pat->matches[n].rm_so == -1) && (pat->matches[n].rm_eo == -1))
+ { Error("%s in replacement but that subexpression wasn't matched",&errstr[0]);
+ subbuf = "";
+ sublen = 0;
+ }
+ else
+ { subbuf = wp + pat->matches[n].rm_so;
+ sublen = pat->matches[n].rm_eo - pat->matches[n].rm_so;
+ }
+ if (sublen > 0)
+ { MAYBE_ADD_SPACE();
+ Buf_AddBytes(buf,sublen,subbuf);
+ }
+ }
+ else
+ { MAYBE_ADD_SPACE();
+ Buf_AddByte(buf,*rp);
+ }
+ }
+ wp += pat->matches[0].rm_eo;
+ if (pat->global) goto tryagain;
+ if (*wp)
+ { MAYBE_ADD_SPACE();
+ Buf_AddBytes(buf,strlen(wp),wp);
+ }
+ break;
+ default:
+ { char *errbuf;
+ int errlen;
+ errlen = regerror(xrv,&pat->re,0,0);
+ errbuf = malloc(errlen);
+ regerror(xrv,&pat->re,errbuf,errlen);
+ Error("unexpected regex error: %s",errbuf);
+ free(errbuf);
+ }
+ /* fall through */
+ case REG_NOMATCH:
+ if (*wp)
+ { MAYBE_ADD_SPACE();
+ Buf_AddBytes(buf,strlen(wp),wp);
+ }
+ break;
+ }
+ return(addSpace||added);
+#undef MAYBE_ADD_SPACE
+}
+
+
/*-
*-----------------------------------------------------------------------
* VarModify --
@@ -1600,6 +1733,128 @@
free(pattern.lhs);
free(pattern.rhs);
break;
+ }
+ case 'C':
+ { VarREPattern pat;
+ char *re;
+ int junk;
+ char delim;
+ Buffer buf;
+ int err;
+ delim = tstr[1];
+ tstr += 2;
+ buf = Buf_Init(0);
+ /* Skim through until the matching delimiter is found;
+ pick up variable substitutions on the way. Also
+ allow backslashes to quote the delimiter, $, and \,
+ but don't touch other backslashes. */
+ for (cp=tstr;*cp&&(*cp!=delim);cp++)
+ { if ( (*cp == '\\') &&
+ ( (cp[1] == delim) ||
+ (cp[1] == '$') ||
+ (cp[1] == '\\') ) )
+ { Buf_AddByte(buf,(Byte)cp[1]);
+ cp ++;
+ }
+ else if ((*cp == '$') && (cp[1] != delim))
+ { char *cp2;
+ int len;
+ Boolean freeIt;
+ cp2 = Var_Parse(cp,ctxt,err,&len,&freeIt);
+ Buf_AddBytes(buf,strlen(cp2),(Byte *)cp2);
+ if (freeIt) free(cp2);
+ cp += len - 1;
+ }
+ else
+ { Buf_AddByte(buf,(Byte)*cp);
+ }
+ }
+ Buf_AddByte(buf,(Byte)'\0');
+ if (*cp != delim)
+ { *lengthPtr = cp - start + 1;
+ if (*freePtr) free(str);
+ Buf_Destroy(buf,TRUE);
+ Error("Unclosed substitution for %s (%c missing)",v->name,delim);
+ return(var_Error);
+ }
+ re = (char *) Buf_GetAll(buf,&junk);
+ Buf_Destroy(buf,FALSE);
+ /* Now we've got the match expression; pick up the replacement.
+ Once again, do variable expansion and handle some backslashes. */
+ buf = Buf_Init(0);
+ tstr = cp + 1;
+ for (cp=tstr;*cp&&(*cp!=delim);cp++)
+ { if ( (*cp == '\\') &&
+ ( (cp[1] == delim) ||
+ (cp[1] == '\\') ||
+ (cp[1] == '$') ) )
+ { Buf_AddByte(buf,(Byte)cp[1]);
+ cp ++;
+ }
+ else if ((*cp == '$') && (cp[1] != delim))
+ { char *cp2;
+ int len;
+ Boolean freeIt;
+ cp2 = Var_Parse(cp,ctxt,err,&len,&freeIt);
+ Buf_AddBytes(buf,strlen(cp2),(Byte *)cp2);
+ cp += len - 1;
+ if (freeIt) free(cp2);
+ }
+ else
+ { Buf_AddByte(buf,(Byte)*cp);
+ }
+ }
+ Buf_AddByte(buf,(Byte)'\0');
+ if (*cp != delim)
+ { *lengthPtr = cp - start + 1;
+ free(re);
+ if (*freePtr) free(str);
+ Buf_Destroy(buf,TRUE);
+ Error("Unclosed substitution for %s (%c missing)",v->name,delim);
+ return(var_Error);
+ }
+ pat.replace = (char *) Buf_GetAll(buf,&junk);
+ Buf_Destroy(buf,FALSE);
+ cp ++;
+ pat.global = 0;
+ pat.oneword = 0;
+ for (;;cp++)
+ { switch (*cp)
+ { case 'g':
+ pat.global = 1;
+ continue;
+ break;
+ case '1':
+ pat.oneword = 1;
+ continue;
+ break;
+ }
+ break;
+ }
+ termc = *cp;
+ err = regcomp(&pat.re,re,REG_EXTENDED);
+ if (err)
+ { char *errbuf;
+ int errlen;
+ errlen = regerror(err,&pat.re,0,0);
+ errbuf = malloc(errlen);
+ regerror(err,&pat.re,errbuf,errlen);
+ Error("RE substitution error: %s",errbuf);
+ free(errbuf);
+ free(re);
+ free(pat.replace);
+ return(var_Error);
+ }
+ free(re);
+ pat.nsub = pat.re.re_nsub + 1;
+ if (pat.nsub < 1) pat.nsub = 1;
+ if (pat.nsub > 10) pat.nsub = 10;
+ pat.matches = malloc(pat.nsub*sizeof(regmatch_t));
+ pat.matched = 0;
+ newStr = VarModify(str,VarRegexpSub,(ClientData)&pat);
+ regfree(&pat.re);
+ free(pat.replace);
+ break;
}
case 'T':
if (tstr[1] == endc || tstr[1] == ':') {
der Mouse
mouse@collatz.mcrcim.mcgill.edu
01 EE 31 F6 BB 0C 34 36 00 F3 7C 5A C1 A0 67 1D