Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[pkgsrc/trunk]: pkgsrc mk/subst.mk: fix edge case in detection of identity su...
details: https://anonhg.NetBSD.org/pkgsrc/rev/210d470e6f04
branches: trunk
changeset: 431305:210d470e6f04
user: rillig <rillig%pkgsrc.org@localhost>
date: Mon May 11 19:52:13 2020 +0000
description:
mk/subst.mk: fix edge case in detection of identity substitutions
In a basic regular expression, a dollar-sign only means end-of-string if
it appears at the end of the pattern, or (at the choice of the
implementation) at the end of a \(...\) subexpression.
This affects the package converters/help2man that uses a regular
expression containing a dollar in a non-final position. This regular
expression had not been detected as an identity substitution even though
it is one.
diffstat:
mk/scripts/subst-identity.awk | 10 +++++++---
regress/infra-unittests/subst.sh | 28 ++++++++++++++++++++++++----
2 files changed, 31 insertions(+), 7 deletions(-)
diffs (81 lines):
diff -r 1f8d85e4426c -r 210d470e6f04 mk/scripts/subst-identity.awk
--- a/mk/scripts/subst-identity.awk Mon May 11 19:45:54 2020 +0000
+++ b/mk/scripts/subst-identity.awk Mon May 11 19:52:13 2020 +0000
@@ -1,5 +1,5 @@
#! /usr/bin/awk -f
-# $NetBSD: subst-identity.awk,v 1.2 2020/05/06 06:14:56 rillig Exp $
+# $NetBSD: subst-identity.awk,v 1.3 2020/05/11 19:52:14 rillig Exp $
#
# Tests whether a sed(1) command line consists of only identity substitutions
# like s,id,id,.
@@ -9,13 +9,17 @@
# Returns the first character of the given regular expression,
# if it is a single-character regular expression.
-function identity_char(s) {
+function identity_char(s, sep, i) {
if (s ~ /^[\t -~]/ && s !~ /^[$&*.\[\\\]^]/)
return substr(s, 1, 1);
if (s ~ /^\\[$*.\[\]^]/)
return substr(s, 2, 1) "x";
if (s ~ /^\[[$*.]\]/)
return substr(s, 2, 1) "xx";
+ if (substr(s, 1, 1) == "$" && substr(s, 2, 1) != sep)
+ return substr(s, 1, 1);
+ if (substr(s, 1, 1) == "^" && i > 3)
+ return substr(s, 1, 1);
return "";
}
@@ -29,7 +33,7 @@
i = 3;
pat_to = "";
while (i < len && substr(s, i, 1) != sep) {
- ch = identity_char(substr(s, i));
+ ch = identity_char(substr(s, i), sep, i);
if (ch == "")
break;
pat_to = pat_to substr(ch, 1, 1);
diff -r 1f8d85e4426c -r 210d470e6f04 regress/infra-unittests/subst.sh
--- a/regress/infra-unittests/subst.sh Mon May 11 19:45:54 2020 +0000
+++ b/regress/infra-unittests/subst.sh Mon May 11 19:52:13 2020 +0000
@@ -1,5 +1,5 @@
#! /bin/sh
-# $NetBSD: subst.sh,v 1.35 2020/05/11 19:17:22 rillig Exp $
+# $NetBSD: subst.sh,v 1.36 2020/05/11 19:52:13 rillig Exp $
#
# Tests for mk/subst.mk.
#
@@ -1219,9 +1219,29 @@
# See converters/help2man for an example.
assert_identity 'yes' -e 's,\$(var),$(var),'
- # An unescaped dollar means end-of-line and cannot be part of an
- # identity substitution. This may happen, but is clearly a typo.
- assert_identity 'no' -e 's,$(var),$(var),'
+ # POSIX 2004 and 2018 both define in section "9.3.8 BRE Expression
+ # Anchoring" that a dollar-sign at the end of the string means
+ # end-of-string.
+ #
+ # A dollar-sign followed by \) may or may not be an anchor.
+ # In all other cases the dollar is an ordinary character.
+ assert_identity 'yes' -e 's,$(var),$(var),'
+
+ # Since this dollar-sign may or may not be an anchor, treat the
+ # whole regular expression as not-an-identity.
+ #
+ # Since a regular expression with a subexpression must contain
+ # \( and \), it does not count as an identity substitution anyway,
+ # which makes the implementation simple.
+ assert_identity 'no' -e 's,aaa\(aaa$\),aaa\(aaa$\),'
+
+ assert_identity 'yes' -e 's,$a,$a,'
+ assert_identity 'no' -e 's,a$,a$,'
+
+ # Same for the circumflex.
+ assert_identity 'yes' -e 's,a^,a^,'
+ assert_identity 'no' -e 's,^a,^a,'
+ assert_identity 'no' -e 's,\(^aaa\)aaa,\(^aaa\)aaa,'
test_case_end
fi
Home |
Main Index |
Thread Index |
Old Index