pkgsrc-Changes archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

CVS commit: pkgsrc/mk



Module Name:    pkgsrc
Committed By:   jperkin
Date:           Tue Oct 22 06:29:21 UTC 2024

Modified Files:
        pkgsrc/mk: bsd.pkg.mk
        pkgsrc/mk/checksum: bsd.checksum-vars.mk checksum.awk checksum.mk
        pkgsrc/mk/fetch: bsd.fetch-vars.mk fetch.mk

Log Message:
mk: Add ARGMAX_REQ support for ARG_MAX limits.

The wip/grafana package now contains so many distfiles that any command
that operates on them now overflows ARG_MAX on NetBSD (262144).  This
causes problems both in how bmake handles executing shell commands, as
well as the checksum.awk script that executes digest on them.

Modify checksum.awk to support reading distfiles from stdin, and process
files in batches if a digest command is over 64K in length.  This value
strikes a good balance between performance and ensuring even the most
stringent modern ARG_MAX limit is avoided.

Introduce a new package variable ARGMAX_REQ that if set to a value higher
than ARG_MAX, and if we are unable to detect a bmake that has its own fix
for this problem, will use alternate targets that use temporary files for
input.

With this change, and ARGMAX_REQ=524288 set in wip/grafana, NetBSD's
native make is now able to both create and check its distinfo.

As an added benefit, on systems that do not breach the limit and are
running a newer bmake, the distinfo and makesum targets no longer use a
temporary input file.


To generate a diff of this commit:
cvs rdiff -u -r1.2056 -r1.2057 pkgsrc/mk/bsd.pkg.mk
cvs rdiff -u -r1.4 -r1.5 pkgsrc/mk/checksum/bsd.checksum-vars.mk
cvs rdiff -u -r1.3 -r1.4 pkgsrc/mk/checksum/checksum.awk
cvs rdiff -u -r1.32 -r1.33 pkgsrc/mk/checksum/checksum.mk
cvs rdiff -u -r1.28 -r1.29 pkgsrc/mk/fetch/bsd.fetch-vars.mk
cvs rdiff -u -r1.78 -r1.79 pkgsrc/mk/fetch/fetch.mk

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: pkgsrc/mk/bsd.pkg.mk
diff -u pkgsrc/mk/bsd.pkg.mk:1.2056 pkgsrc/mk/bsd.pkg.mk:1.2057
--- pkgsrc/mk/bsd.pkg.mk:1.2056 Sun Aug 25 06:19:03 2024
+++ pkgsrc/mk/bsd.pkg.mk        Tue Oct 22 06:29:21 2024
@@ -1,4 +1,4 @@
-#      $NetBSD: bsd.pkg.mk,v 1.2056 2024/08/25 06:19:03 wiz Exp $
+#      $NetBSD: bsd.pkg.mk,v 1.2057 2024/10/22 06:29:21 jperkin Exp $
 #
 # This file is in the public domain.
 #
@@ -53,7 +53,20 @@ _DEF_VARS.pkgname=   PKGBASE PKGVERSION PK
 PKG_FAIL_REASON+=      "Circular dependency detected"
 .endif
 
-####
+# Some packages are now hitting ARG_MAX limits as they contain thousands of
+# distfiles.  Versions of bmake prior to 20240711 cannot handle this, and so
+# temporary input files are used for various targets when the limit exceeds
+# ARG_MAX and an older version of bmake is detected.
+#
+# USE_TMPFILES is checked in various bsd.*-vars.mk so needs to be set early.
+#
+USE_TMPFILES=  no
+.if defined(ARGMAX_REQ) && ${MAKE_VERSION:U0} < 20240711
+ARGMAX_cmd=    getconf ARG_MAX || ${ECHO} 0
+.  if ${ARGMAX_cmd:sh} < ${ARGMAX_REQ:U0}
+USE_TMPFILES=  yes
+.  endif
+.endif
 
 ############################################################################
 # Allow various phases to define the default variables

Index: pkgsrc/mk/checksum/bsd.checksum-vars.mk
diff -u pkgsrc/mk/checksum/bsd.checksum-vars.mk:1.4 pkgsrc/mk/checksum/bsd.checksum-vars.mk:1.5
--- pkgsrc/mk/checksum/bsd.checksum-vars.mk:1.4 Fri Oct 11 12:53:13 2024
+++ pkgsrc/mk/checksum/bsd.checksum-vars.mk     Tue Oct 22 06:29:21 2024
@@ -1,4 +1,4 @@
-# $NetBSD: bsd.checksum-vars.mk,v 1.4 2024/10/11 12:53:13 jperkin Exp $
+# $NetBSD: bsd.checksum-vars.mk,v 1.5 2024/10/22 06:29:21 jperkin Exp $
 #
 # This Makefile fragment is included separately by bsd.pkg.mk and
 # defines some variables which must be defined earlier than where
@@ -8,6 +8,12 @@
 #
 #    DISTINFO_FILE is the path to file containing the checksums.
 #
+#    ARGMAX_REQ is the minimum ARG_MAX required to correctly create or
+#    validate all of the checksums.  In packages that have a large number
+#    (many thousands) of distfiles, this should be set to a power of two
+#    above the required minimum.  For example 524288 to indicate that the
+#    current NetBSD limit of 262144 is insufficient.
+#
 # Keywords: distinfo
 
 DISTINFO_FILE?=                ${PKGDIR}/distinfo
@@ -15,3 +21,15 @@ DISTINFO_FILE?=              ${PKGDIR}/distinfo
 .if !empty(_CKSUMFILES) && empty(TOOLS_PLATFORM.mktool)
 USE_TOOLS+=    digest:bootstrap
 .endif
+
+# If a package has a number of distfiles that may overflow ARG_MAX then use
+# temporary files, otherwise use pipes.  The temporary files need to live in
+# a known directory and cannot use WRKDIR for example because it is not
+# guaranteed to exist when the target is called.
+#
+.if ${USE_TMPFILES} == yes
+_CKSUMFILES_INPUT_cmd= ${MKTEMP} ${TMPDIR:U/tmp:Q}/pkgsrc.cksumfiles.XXXXXXXX
+USE_TOOLS+=            mktemp:bootstrap
+.else
+_CKSUMFILES_INPUT=     /dev/stdin
+.endif

Index: pkgsrc/mk/checksum/checksum.awk
diff -u pkgsrc/mk/checksum/checksum.awk:1.3 pkgsrc/mk/checksum/checksum.awk:1.4
--- pkgsrc/mk/checksum/checksum.awk:1.3 Wed Oct  7 18:09:52 2020
+++ pkgsrc/mk/checksum/checksum.awk     Tue Oct 22 06:29:21 2024
@@ -1,6 +1,6 @@
 #!/usr/bin/awk -f
 #
-# $NetBSD: checksum.awk,v 1.3 2020/10/07 18:09:52 jperkin Exp $
+# $NetBSD: checksum.awk,v 1.4 2024/10/22 06:29:21 jperkin Exp $
 #
 ###########################################################################
 #
@@ -47,6 +47,7 @@ BEGIN {
        # Retain output compatible with previous "checksum" shell script
        progname = "checksum"
 
+       arg_count = 0
        only_alg = ""
        distinfo = ""
        exitcode = 0
@@ -57,6 +58,11 @@ BEGIN {
                opt = ARGV[arg]
                if (opt == "-a") {
                        only_alg = ARGV[++arg]
+               } else if (opt == "-I") {
+                       infile = ARGV[++arg]
+                       while (getline < infile) {
+                               arg_list[arg_count++] = $0
+                       }
                } else if (opt == "-p") {
                        patch = 1
                } else if (opt == "-s") {
@@ -93,7 +99,11 @@ BEGIN {
        # in patch mode (-p).
        #
        while (arg < ARGC) {
-               distfile = ARGV[arg++]
+               arg_list[arg_count++] = ARGV[arg++]
+       }
+       i = 0
+       while (i < arg_count) {
+               distfile = arg_list[i++]
                sfile = distfile
                if (suffix) {
                        sfile = strip_suffix(sfile)
@@ -168,11 +178,29 @@ BEGIN {
                }
 
                #
+               # Due to command line argument limits we sometimes need to
+               # split commands up.  Store a count for each algorithm, which
+               # also serves as a way to iterate over all of the algorithms
+               # we've encountered.
+               #
+               if (!batch[algorithm]) {
+                       batch[algorithm] = 0
+               }
+
+               #
                # If not a patch file, then we're handling a distfile, where we
                # want to build a list of input files to digest(1) so they can
                # all be calculated in one go.
                #
-               distsums[algorithm] = sprintf("%s %s", distsums[algorithm],
+               # Increase the batch number if over 64K.  This is well below the
+               # limits seen in the wild (e.g. NetBSD at 256K), but there are
+               # very minimal improvements above this threshold in testing.
+               #
+               b = batch[algorithm]
+               if (length(distsums[algorithm,b]) > 65536) {
+                       batch[algorithm] = ++b
+               }
+               distsums[algorithm,b] = sprintf("%s %s", distsums[algorithm,b],
                    distfiles[distfile])
        }
        close(distinfo)
@@ -182,23 +210,26 @@ BEGIN {
        # pass them all to a single digest(1) command and parse the checksums
        # to be compared against distinfo.
        #
-       for (algorithm in distsums) {
-               cmd = sprintf("%s %s %s", DIGEST, algorithm,
-                   distsums[algorithm])
-               while ((cmd | getline) > 0) {
-                       # Should be unnecessary, but just in case.  If we want
-                       # to be really paranoid then test that $1 == algorithm.
-                       if (NF != 4) {
-                               continue
-                       }
-                       # strip "(filename)" -> "filename"
-                       distfile = substr($2, 2, length($2) - 2)
-                       if (suffix) {
-                               distfile = strip_suffix(distfile)
+       for (algorithm in batch) {
+               for (b = 0; b <= batch[algorithm]; b++) {
+                       cmd = sprintf("%s %s %s", DIGEST, algorithm,
+                           distsums[algorithm,b])
+                       while ((cmd | getline) > 0) {
+                               # Should be unnecessary, but just in case.  If
+                               # we want to be really paranoid then test that
+                               # $1 == algorithm.
+                               if (NF != 4) {
+                                       continue
+                               }
+                               # strip "(filename)" -> "filename"
+                               distfile = substr($2, 2, length($2) - 2)
+                               if (suffix) {
+                                       distfile = strip_suffix(distfile)
+                               }
+                               checksums[$1, distfile] = $4
                        }
-                       checksums[$1, distfile] = $4
+                       close(cmd)
                }
-               close(cmd)
        }
 
        #

Index: pkgsrc/mk/checksum/checksum.mk
diff -u pkgsrc/mk/checksum/checksum.mk:1.32 pkgsrc/mk/checksum/checksum.mk:1.33
--- pkgsrc/mk/checksum/checksum.mk:1.32 Fri Oct 11 11:40:29 2024
+++ pkgsrc/mk/checksum/checksum.mk      Tue Oct 22 06:29:21 2024
@@ -1,4 +1,4 @@
-# $NetBSD: checksum.mk,v 1.32 2024/10/11 11:40:29 jperkin Exp $
+# $NetBSD: checksum.mk,v 1.33 2024/10/22 06:29:21 jperkin Exp $
 #
 # See bsd.checksum.mk for helpful comments.
 #
@@ -39,11 +39,16 @@ checksum checksum-phase:
        @${DO_NADA}
 .else
 checksum checksum-phase:
+.  if ${USE_TMPFILES} == yes
+       ${_CKSUMFILES_INPUT::=${_CKSUMFILES_INPUT_cmd:sh}}
+.    for file in ${_CKSUMFILES}
+       @${ECHO} ${file} >> ${_CKSUMFILES_INPUT}
+.    endfor
        ${RUN} set -e;                                                  \
        case ${.TARGET:Q} in                                            \
        *-phase)        ${TEST} ! -f ${_COOKIE.checksum} || exit 0 ;;   \
        esac;                                                           \
-       if cd ${DISTDIR} && ${_CHECKSUM_CMD} ${DISTINFO_FILE:tA} ${_CKSUMFILES}; then \
+       if cd ${DISTDIR} && ${_CHECKSUM_CMD} -I ${_CKSUMFILES_INPUT} ${DISTINFO_FILE:tA}; then \
                ${TRUE};                                                \
        else                                                            \
                ${ERROR_MSG} "Make sure the Makefile and checksum file (${DISTINFO_FILE})"; \
@@ -51,6 +56,23 @@ checksum checksum-phase:
                ${ERROR_MSG} "\"${MAKE} NO_CHECKSUM=yes [other args]\"."; \
                exit 1;                                                 \
        fi
+       @${RM} -f ${_CKSUMFILES_INPUT}
+.  else
+       ${RUN} set -e;                                                  \
+       case ${.TARGET:Q} in                                            \
+       *-phase)        ${TEST} ! -f ${_COOKIE.checksum} || exit 0 ;;   \
+       esac;                                                           \
+       cd ${DISTDIR};                                                  \
+       if { ${_CKSUMFILES:@f@ ${ECHO} ${f};@} }                        \
+           | ${_CHECKSUM_CMD} -I ${_CKSUMFILES_INPUT} ${DISTINFO_FILE:tA}; then \
+               ${TRUE};                                                \
+       else                                                            \
+               ${ERROR_MSG} "Make sure the Makefile and checksum file (${DISTINFO_FILE})"; \
+               ${ERROR_MSG} "are up to date.  If you want to override this check, type"; \
+               ${ERROR_MSG} "\"${MAKE} NO_CHECKSUM=yes [other args]\"."; \
+               exit 1;                                                 \
+       fi
+.  endif
 .endif
 
 .if !empty(TOOLS_PLATFORM.mktool)
@@ -77,16 +99,30 @@ _DISTINFO_ARGS_COMMON+=     ${_PATCH_DIGEST_
 _DISTINFO_ARGS_PATCHSUM+=      ${PATCHDIR}/patch-*
 _DISTINFO_ARGS_PATCHSUM+=      ${PATCHDIR}/emul-*-patch-*
 
-_DISTINFO_INPUTFILE=           ${DISTINFO_FILE}.filelist
-
 distinfo:
-.for file in ${_CKSUMFILES}
-       @${ECHO} ${file} >> ${_DISTINFO_INPUTFILE}
-.endfor
+.  if ${USE_TMPFILES} == yes
+       ${_CKSUMFILES_INPUT::=${_CKSUMFILES_INPUT_cmd:sh}}
+.    for file in ${_CKSUMFILES}
+       @${ECHO} ${file} >> ${_CKSUMFILES_INPUT}
+.    endfor
        ${RUN}set -e;                                                   \
        newfile=${DISTINFO_FILE}.$$$$;                                  \
        if ${_DISTINFO_CMD} ${_DISTINFO_ARGS_COMMON}                    \
-               -I ${_DISTINFO_INPUTFILE} ${_DISTINFO_ARGS_PATCHSUM} > $$newfile;                               \
+               -I ${_CKSUMFILES_INPUT} ${_DISTINFO_ARGS_PATCHSUM} > $$newfile; \
+       then                                                            \
+               ${RM} -f $$newfile;                                     \
+               ${ECHO_MSG} "=> distinfo: unchanged.";                  \
+       else                                                            \
+               ${RM} -f ${DISTINFO_FILE};                              \
+               ${MV} -f $$newfile ${DISTINFO_FILE};                    \
+       fi
+       @${RM} -f ${_CKSUMFILES_INPUT}
+.  else
+       ${RUN}set -e;                                                   \
+       newfile=${DISTINFO_FILE}.$$$$;                                  \
+       if { ${_CKSUMFILES:@f@ ${ECHO} ${f};@} }        \
+           | ${_DISTINFO_CMD} ${_DISTINFO_ARGS_COMMON}                 \
+               -I ${_CKSUMFILES_INPUT} ${_DISTINFO_ARGS_PATCHSUM} > $$newfile; \
        then                                                            \
                ${RM} -f $$newfile;                                     \
                ${ECHO_MSG} "=> distinfo: unchanged.";                  \
@@ -94,16 +130,32 @@ distinfo:
                ${RM} -f ${DISTINFO_FILE};                              \
                ${MV} -f $$newfile ${DISTINFO_FILE};                    \
        fi
-       @rm ${_DISTINFO_INPUTFILE}
+.  endif
 
 makesum:
-.for file in ${_CKSUMFILES}
-       @${ECHO} ${file} >> ${_DISTINFO_INPUTFILE}
-.endfor
+.  if ${USE_TMPFILES} == yes
+       ${_CKSUMFILES_INPUT::=${_CKSUMFILES_INPUT_cmd:sh}}
+.    for file in ${_CKSUMFILES}
+       @${ECHO} ${file} >> ${_CKSUMFILES_INPUT}
+.    endfor
        ${RUN}set -e;                                                   \
        newfile=${DISTINFO_FILE}.$$$$;                                  \
        if ${_DISTINFO_CMD} ${_DISTINFO_ARGS_COMMON}                    \
-               -I ${_DISTINFO_INPUTFILE} > $$newfile;                  \
+               -I ${_CKSUMFILES_INPUT} > $$newfile;                    \
+       then                                                            \
+               ${RM} -f $$newfile;                                     \
+               ${ECHO_MSG} "=> distinfo: distfiles part unchanged.";   \
+       else                                                            \
+               ${RM} -f ${DISTINFO_FILE};                              \
+               ${MV} -f $$newfile ${DISTINFO_FILE};                    \
+       fi
+       @${RM} -f ${_CKSUMFILES_INPUT}
+.  else
+       ${RUN}set -e;                                                   \
+       newfile=${DISTINFO_FILE}.$$$$;                                  \
+       if { ${_CKSUMFILES:@f@ ${ECHO} ${f};@} }                        \
+           | ${_DISTINFO_CMD} ${_DISTINFO_ARGS_COMMON}                 \
+               -I ${_CKSUMFILES_INPUT} > $$newfile;                    \
        then                                                            \
                ${RM} -f $$newfile;                                     \
                ${ECHO_MSG} "=> distinfo: distfiles part unchanged.";   \
@@ -111,7 +163,7 @@ makesum:
                ${RM} -f ${DISTINFO_FILE};                              \
                ${MV} -f $$newfile ${DISTINFO_FILE};                    \
        fi
-       @rm ${_DISTINFO_INPUTFILE}
+.  endif
 
 makepatchsum:
        ${RUN}set -e;                                                   \

Index: pkgsrc/mk/fetch/bsd.fetch-vars.mk
diff -u pkgsrc/mk/fetch/bsd.fetch-vars.mk:1.28 pkgsrc/mk/fetch/bsd.fetch-vars.mk:1.29
--- pkgsrc/mk/fetch/bsd.fetch-vars.mk:1.28      Fri Oct 11 12:53:13 2024
+++ pkgsrc/mk/fetch/bsd.fetch-vars.mk   Tue Oct 22 06:29:21 2024
@@ -1,4 +1,4 @@
-# $NetBSD: bsd.fetch-vars.mk,v 1.28 2024/10/11 12:53:13 jperkin Exp $
+# $NetBSD: bsd.fetch-vars.mk,v 1.29 2024/10/22 06:29:21 jperkin Exp $
 #
 # This Makefile fragment is included separately by bsd.pkg.mk and
 # defines some variables which must be defined earlier than where
@@ -117,6 +117,18 @@ _FETCH_TOOLS.wget=         wget
 _FETCH_TOOLS.curl=             curl
 _FETCH_TOOLS.manual=           false
 
+# If a package has a number of distfiles that may overflow ARG_MAX then use
+# temporary files, otherwise use pipes.  The temporary files need to live in
+# a known directory and cannot use WRKDIR for example because it is not
+# guaranteed to exist when the target is called.
+#
+.if ${USE_TMPFILES} == yes
+_FETCHFILES_INPUT_cmd= ${MKTEMP} ${TMPDIR:U/tmp:Q}/pkgsrc.fetchfiles.XXXXXXXX
+USE_TOOLS+=            mktemp:bootstrap
+.else
+_FETCHFILES_INPUT=     /dev/stdin
+.endif
+
 .if !empty(_ALLFILES)
 USE_TOOLS+=    ${_FETCH_TOOLS.${FETCH_USING}:C/$/:bootstrap/}
 BOOTSTRAP_DEPENDS+=    ${_FETCH_DEPENDS.${FETCH_USING}}

Index: pkgsrc/mk/fetch/fetch.mk
diff -u pkgsrc/mk/fetch/fetch.mk:1.78 pkgsrc/mk/fetch/fetch.mk:1.79
--- pkgsrc/mk/fetch/fetch.mk:1.78       Mon Oct 14 08:02:40 2024
+++ pkgsrc/mk/fetch/fetch.mk    Tue Oct 22 06:29:21 2024
@@ -1,4 +1,4 @@
-# $NetBSD: fetch.mk,v 1.78 2024/10/14 08:02:40 jperkin Exp $
+# $NetBSD: fetch.mk,v 1.79 2024/10/22 06:29:21 jperkin Exp $
 
 .if empty(INTERACTIVE_STAGE:Mfetch) && empty(FETCH_MESSAGE:U)
 _MASTER_SITE_BACKUP=   ${MASTER_SITE_BACKUP:=${DIST_SUBDIR}${DIST_SUBDIR:D/}}
@@ -95,11 +95,25 @@ fetch: ${_FETCH_TARGETS}
 .  if !empty(_ALLFILES)
 do-fetch: ${_ALLFILES:S/^/${DISTDIR}\//}
 .    if ${FETCH_USING} == "mktool" && !empty(TOOLS_PLATFORM.mktool)
+.      if ${USE_TMPFILES} == yes
+       ${_FETCHFILES_INPUT::=${_FETCHFILES_INPUT_cmd:sh}}
+.        for file in ${_ALLFILES}
+       ${RUN}                                                          \
+       unsorted_sites="${SITES.${file:T}}";                            \
+       sites="${_ORDERED_SITES} ${_MASTER_SITE_BACKUP}";               \
+       ${ECHO} ${file} ${DISTDIR} $$sites >>${_FETCHFILES_INPUT}
+.        endfor
+       @${TOOLS_PLATFORM.mktool} fetch -I ${_FETCHFILES_INPUT}         \
+               ${_MKTOOL_FETCH_ARGS}
+       @${RM} -f ${_FETCHFILES_INPUT}
+.      else
        @{ ${_ALLFILES:@file@                                           \
                unsorted_sites="${SITES.${file:T}}";                    \
                sites="${_ORDERED_SITES} ${_MASTER_SITE_BACKUP}";       \
                echo ${file} ${DISTDIR} $$sites;                        \
-       @} } | ${TOOLS_PLATFORM.mktool} fetch -I - ${_MKTOOL_FETCH_ARGS}
+       @} } | ${TOOLS_PLATFORM.mktool} fetch -I ${_FETCHFILES_INPUT}   \
+               ${_MKTOOL_FETCH_ARGS}
+.      endif
 .    else
        @${DO_NADA}
 .    endif



Home | Main Index | Thread Index | Old Index