Subject: pkg/34695: Master sites list randomization
To: None <pkg-manager@netbsd.org, gnats-admin@netbsd.org,>
From: None <mishka@netbsd.org>
List: pkgsrc-bugs
Date: 10/02/2006 14:40:00
>Number:         34695
>Category:       pkg
>Synopsis:       Master sites list randomization
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    pkg-manager
>State:          open
>Class:          change-request
>Submitter-Id:   net
>Arrival-Date:   Mon Oct 02 14:40:00 +0000 2006
>Originator:     Mike M. Volokhov
>Release:        pkgsrc-current as of 20060902
>Organization:
	NONE
>Environment:
No matter
>Description:

	Hello!

	As you may note, static list of package master sites
	accomplished with some known problems. Huge load on top of
	the list sites from pkgsrc users, and died sites (also at
	the top of list) are most annoying.

>How-To-Repeat:
	
	make fetch

>Fix:

	At this moment you can shuffle master sites with MASTER_SORT
	and MASTER_SORT_REGEX. But you may add some extra randomization
	with just implemented MASTER_SORT_RANDOM=yes|no feature.
	It will randomize sites just before any other ordering, so
	if you altready using some sorting, your sites will be
	reshuffled in each group of your selection.  To randomize
	a whole list use MASTER_SORT_RANDOM=yes only.

	To see randomization in action, just add ``| tee /tmp/sites''
	to _RAND_SITES_CMD variable.

	Also, I'd likely move _MASTER_SITE_BACKUP from unsorted
	list to sorted one, so they all will be used more as backup
	sites.

	The patch:

Index: defaults/mk.conf
===================================================================
RCS file: /cvsroot/pkgsrc/mk/defaults/mk.conf,v
retrieving revision 1.131
diff -u -r1.131 mk.conf
--- defaults/mk.conf	27 Sep 2006 15:18:15 -0000	1.131
+++ defaults/mk.conf	2 Oct 2006 14:14:45 -0000
@@ -397,6 +397,16 @@
 # Possible: Regexps as in awk(1)
 # Default: none
 
+.if defined(PKG_DEVELOPER)
+MASTER_SORT_RANDOM?=	NO
+.else
+MASTER_SORT_RANDOM?=	YES
+.endif
+# If set to YES or yes, a list of master sites will be randomly intermixed.
+# Also, both MASTER_SORT and MASTER_SORT_REGEX may be applied later.
+# Possible: yes, no / not defined
+# Default: NO if PKG_DEVELOPER is defined, YES otherwise
+
 #PATCH_DEBUG=
 # Used to debug patches as they are applied
 # Possible: defined, not defined
Index: fetch/fetch.mk
===================================================================
RCS file: /cvsroot/pkgsrc/mk/fetch/fetch.mk,v
retrieving revision 1.20
diff -u -r1.20 fetch.mk
--- fetch/fetch.mk	27 Jul 2006 18:48:03 -0000	1.20
+++ fetch/fetch.mk	2 Oct 2006 14:14:45 -0000
@@ -30,10 +30,20 @@
 _BUILD_DEFS+=	_DISTFILES _PATCHFILES
 
 # Set up _ORDERED_SITES to work out the exact list of sites for every file,
-# using the dynamic sites script, or sorting according to the master site
-# list or the patterns in MASTER_SORT or MASTER_SORT_REGEX as appropriate.
+# using the dynamic sites script, or ordering according to the master site
+# list, MASTER_SORT_RANDOM randomization feature, or the patterns in
+# MASTER_SORT or MASTER_SORT_REGEX as appropriate.
 # No actual sorting is done until _ORDERED_SITES is expanded.
 #
+.if defined(MASTER_SORT_RANDOM) && !empty(MASTER_SORT_RANDOM:M[yY][eE][sS])
+_MASTER_RAND_AWK= { srand(systime()); n = split($$0, site); \
+		    for (i = n; i > 0; i--) { \
+			ir = int(rand() * i + 1); \
+			t = site[i]; site[i] = site[ir]; site[ir] = t; \
+			print site[i]; } }
+_RAND_SITES_CMD= | ${AWK} '${_MASTER_RAND_AWK}'
+.endif
+
 .if defined(MASTER_SORT) || defined(MASTER_SORT_REGEX)
 MASTER_SORT?=
 MASTER_SORT_REGEX?=
@@ -44,9 +54,12 @@
 _MASTER_SORT_AWK+= /${srt:C/\//\\\//g}/ { good["${srt:S/\\/\\\\/g}"] = good["${srt:S/\\/\\\\/g}"] " " $$0 ; next; }
 .  endfor
 _MASTER_SORT_AWK+= { rest = rest " " $$0; } END { n=split(gl, gla); for(i=1;i<=n;i++) { print good[gla[i]]; } print rest; }
+_SORT_SITES_CMD+= | ${AWK} '${_MASTER_SORT_AWK}'
+.endif
 
-_SORT_SITES_CMD= ${ECHO} $$unsorted_sites | ${AWK} '${_MASTER_SORT_AWK}'
-_ORDERED_SITES= ${_MASTER_SITE_OVERRIDE} `${_SORT_SITES_CMD:S/\\/\\\\/g:C/"/\"/g}`
+.if defined(_RAND_SITES_CMD) || defined(_SORT_SITES_CMD)
+_SORT_SITES_FULL_CMD= ${ECHO} $$unsorted_sites ${_RAND_SITES_CMD} ${_SORT_SITES_CMD}
+_ORDERED_SITES= ${_MASTER_SITE_OVERRIDE} `${_SORT_SITES_FULL_CMD:S/\\/\\\\/g:C/"/\"/g}`
 .else
 _ORDERED_SITES= ${_MASTER_SITE_OVERRIDE} $$unsorted_sites
 .endif
@@ -245,8 +258,8 @@
 		fi;							\
 	done
 	${_PKG_SILENT}${_PKG_DEBUG}set -e;				\
-	unsorted_sites="${SITES.${.TARGET:T:S/=/--/}} ${_MASTER_SITE_BACKUP}"; \
-	sites="${_ORDERED_SITES}";					\
+	unsorted_sites="${SITES.${.TARGET:T:S/=/--/}}";			\
+	sites="${_ORDERED_SITES} ${_MASTER_SITE_BACKUP}";		\
 	cd ${.TARGET:H:S/\/${DIST_SUBDIR}$//} &&			\
 	${_FETCH_CMD} ${_FETCH_ARGS} ${.TARGET:T} $$sites
 	${_PKG_SILENT}${_PKG_DEBUG}					\