Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/netbsd-7]: src/distrib/sets Pull up following revision(s) (requested by ...



details:   https://anonhg.NetBSD.org/src/rev/fa89ec9272d6
branches:  netbsd-7
changeset: 798458:fa89ec9272d6
user:      martin <martin%NetBSD.org@localhost>
date:      Fri Oct 24 07:30:14 2014 +0000

description:
Pull up following revision(s) (requested by apbapb in ticket #155):
        distrib/sets/join.awk: revision 1.4
        distrib/sets/join.awk: revision 1.5
Add an unvis function and use it on file names in mtree specs.
This should correct the problem that ./bin/[ was missing from the
base.tgz set, despite being listed in src/distrib/sets/base/mi
and being present in METALOG.  The corresponding entry in
METALOG.sanitised has ./bin/\133 instead of ./bin/[, and that made
join.awk omit it.
XXX: The unvis() implementation in join.awk handles only a subset
of the syntax, but it's probably good enough for now.
XXX: The file names should probably be canonicalised by
vis(unvis(name)), but at present none of the file names in the set
lists really need it.
XXX: It may be a bug that entries in the set lists without
corresponding entries in METALOG are silently ignored by join.awk.
Add vis() function, and canonicalise file names via vis(unvis($1)).
XXX: The vis() function is very limited, due to the absence of ord()
in NetBSD's awk.

diffstat:

 distrib/sets/join.awk |  105 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 102 insertions(+), 3 deletions(-)

diffs (130 lines):

diff -r 0d4c7132feeb -r fa89ec9272d6 distrib/sets/join.awk
--- a/distrib/sets/join.awk     Fri Oct 24 07:28:14 2014 +0000
+++ b/distrib/sets/join.awk     Fri Oct 24 07:30:14 2014 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: join.awk,v 1.3 2008/04/30 13:10:49 martin Exp $
+#      $NetBSD: join.awk,v 1.3.42.1 2014/10/24 07:30:14 martin Exp $
 #
 # Copyright (c) 2002 The NetBSD Foundation, Inc.
 # All rights reserved.
@@ -30,7 +30,102 @@
 # join.awk F1 F2
 #      Similar to join(1), this reads a list of words from F1
 #      and outputs lines in F2 with a first word that is in F1.
-#      Neither file needs to be sorted
+#      The first word is canonicalised via vis(unvis(word))).
+#      Neither file needs to be sorted.
+
+function unvis(s) \
+{
+       # XXX: We don't handle the complete range of vis encodings
+       unvis_result = ""
+       while (length(s) > 0) {
+               unvis_pos = match(s, "\\\\.")
+               if (unvis_pos == 0) {
+                       unvis_result = unvis_result "" s
+                       s = ""
+                       break
+               }
+               # copy the part before the next backslash
+               unvis_result = unvis_result "" substr(s, 1, unvis_pos - 1)
+               s = substr(s, unvis_pos)
+               # process the backslash and next few chars
+               if (substr(s, 1, 2) == "\\\\") {
+                       # double backslash -> single backslash
+                       unvis_result = unvis_result "\\"
+                       s = substr(s, 3)
+               } else if (match(s, "\\\\[0-7][0-7][0-7]") == 1) {
+                       # \ooo with three octal digits.
+                       # XXX: use strtonum() when that is available
+                       unvis_result = unvis_result "" sprintf("%c", \
+                               0+substr(s, 2, 1) * 64 + \
+                               0+substr(s, 3, 1) * 8 + \
+                               0+substr(s, 4, 1))
+                       s = substr(s, 5)
+               } else {
+                       # unrecognised escape: keep the literal backslash
+                       printf "%s: %s:%s: unrecognised escape %s\n", \
+                               ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR, \
+                               substr(s, 1, 2) \
+                               >"/dev/stderr"
+                       unvis_result = unvis_result "" substr(s, 1, 1)
+                       s = substr(s, 2)
+               }
+       }
+       return unvis_result
+}
+
+function vis(s) \
+{
+       # We need to encode backslash, space, and tab, because they
+       # would interfere with scripts that attempt to manipulate
+       # the set files.
+       #
+       # We make no attempt to encode shell special characters
+       # such as " ' $ ( ) { } [ ] < > * ?, because nothing that
+       # parses set files would need that.
+       #
+       # We would like to handle other white space or non-graph
+       # characters, because they may be confusing for human readers,
+       # but they are too difficult to handle in awk without the ord()
+       # function, so we print an error message.
+       #
+       # As of October 2014, no files in the set lists contain
+       # characters that would need any kind of encoding.
+       #
+       vis_result = ""
+       while (length(s) > 0) {
+               vis_pos = match(s, "(\\\\|[[:space:]]|[^[:graph:]])")
+               if (vis_pos == 0) {
+                       vis_result = vis_result "" s
+                       s = ""
+                       break
+               }
+               # copy the part before the next special char
+               vis_result = vis_result "" substr(s, 1, vis_pos - 1)
+               vis_char = substr(s, vis_pos, 1)
+               s = substr(s, vis_pos + 1)
+               # process the special char
+               if (vis_char == "\\") {
+                       # backslash -> double backslash
+                       vis_result = vis_result "\\\\"
+               } else if (vis_char == " ") {
+                       # space -> \040
+                       vis_result = vis_result "\\040"
+               } else if (vis_char == "\t") {
+                       # tab -> \011
+                       vis_result = vis_result "\\011"
+               } else {
+                       # generalised \ooo with three octal digits.
+                       # XXX: I don't know how to do this in awk without ord()
+                       printf "%s: %s:%s: cannot perform vis encoding\n", \
+                               ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR \
+                               >"/dev/stderr"
+                       vis_result = vis_result "" vis_char
+               }
+       }
+       return vis_result
+}
+
+// { $1 = vis(unvis($1)); print }
 
 BEGIN \
 {
@@ -38,11 +133,15 @@
                printf("Usage: join file1 file2\n") >"/dev/stderr"
                exit 1
        }
-       while ( (getline < ARGV[1]) > 0)
+       while ( (getline < ARGV[1]) > 0) {
+               $1 = vis(unvis($1))
                words[$1] = $0
+       }
        delete ARGV[1]
 }
 
+// { $1 = vis(unvis($1)) }
+
 $1 in words \
 {
        f1=$1



Home | Main Index | Thread Index | Old Index