pkgsrc-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[pkgsrc/trunk]: pkgsrc/pkgtools pkgtools/distlint: add early prototype implem...
details: https://anonhg.NetBSD.org/pkgsrc/rev/c8451f5f4ed1
branches: trunk
changeset: 384791:c8451f5f4ed1
user: rillig <rillig%pkgsrc.org@localhost>
date: Sun Sep 04 21:38:47 2022 +0000
description:
pkgtools/distlint: add early prototype implementation
Do not enable the package yet as it is still under active development
and lacking proper documentation.
diffstat:
pkgtools/Makefile | 3 +-
pkgtools/distlint/DESCR | 2 +
pkgtools/distlint/Makefile | 31 ++++
pkgtools/distlint/PLIST | 2 +
pkgtools/distlint/files/distlint.py | 259 ++++++++++++++++++++++++++++++++++++
5 files changed, 296 insertions(+), 1 deletions(-)
diffs (truncated from 327 to 300 lines):
diff -r bcda69e42281 -r c8451f5f4ed1 pkgtools/Makefile
--- a/pkgtools/Makefile Sun Sep 04 21:03:43 2022 +0000
+++ b/pkgtools/Makefile Sun Sep 04 21:38:47 2022 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.122 2022/06/07 09:54:01 jperkin Exp $
+# $NetBSD: Makefile,v 1.123 2022/09/04 21:38:47 rillig Exp $
#
COMMENT= Tools for use in the packages collection
@@ -16,6 +16,7 @@
SUBDIR+= dfdisk
SUBDIR+= digest
SUBDIR+= distbb
+#SUBDIR+= distlint # early prototype
SUBDIR+= genpkgng
SUBDIR+= genrpm
SUBDIR+= gensolpkg
diff -r bcda69e42281 -r c8451f5f4ed1 pkgtools/distlint/DESCR
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pkgtools/distlint/DESCR Sun Sep 04 21:38:47 2022 +0000
@@ -0,0 +1,2 @@
+Distlint verifies that the distfiles that are uploaded to a server
+satisfy the requirements of the packages.
diff -r bcda69e42281 -r c8451f5f4ed1 pkgtools/distlint/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pkgtools/distlint/Makefile Sun Sep 04 21:38:47 2022 +0000
@@ -0,0 +1,31 @@
+# $NetBSD: Makefile,v 1.1 2022/09/04 21:38:47 rillig Exp $
+
+PKGNAME= distlint-22.09.04
+CATEGORIES= pkgtools
+
+MAINTAINER= pkgsrc-users%NetBSD.org@localhost
+HOMEPAGE= https://www.pkgsrc.org/
+COMMENT= Verify distfiles against license requirements
+LICENSE= modified-bsd
+
+WRKSRC= ${WRKDIR}
+AUTO_MKDIRS= yes
+
+REPLACE_PYTHON= distlint.py
+
+SUBST_CLASSES+= dirs
+SUBST_STAGE.dirs= pre-configure
+SUBST_FILES.dirs= distlint.py
+SUBST_VARS.dirs= PREFIX
+
+do-extract:
+ cp ${FILESDIR}/distlint.py ${WRKSRC}
+
+do-build:
+ # Nothing
+
+do-install:
+ ${INSTALL_SCRIPT} ${WRKSRC}/distlint.py ${DESTDIR}${PREFIX}/bin/distlint
+
+.include "../../lang/python/application.mk"
+.include "../../mk/bsd.pkg.mk"
diff -r bcda69e42281 -r c8451f5f4ed1 pkgtools/distlint/PLIST
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pkgtools/distlint/PLIST Sun Sep 04 21:38:47 2022 +0000
@@ -0,0 +1,2 @@
+@comment $NetBSD: PLIST,v 1.1 2022/09/04 21:38:47 rillig Exp $
+bin/distlint
diff -r bcda69e42281 -r c8451f5f4ed1 pkgtools/distlint/files/distlint.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pkgtools/distlint/files/distlint.py Sun Sep 04 21:38:47 2022 +0000
@@ -0,0 +1,259 @@
+#! /usr/bin/env python
+# $NetBSD: distlint.py,v 1.1 2022/09/04 21:38:48 rillig Exp $
+
+import argparse
+import os
+import re
+from pathlib import Path
+from typing import NamedTuple, List, Optional, Set, Dict
+from logging import debug
+
+
+pkg_info = '@PREFIX@/sbin/pkg_info'
+
+
+class DistinfoEntry(NamedTuple):
+ """
+ One distfile from a distinfo file, including its hashes.
+ """
+ distfile: Path
+ hashes: Dict[str, str]
+
+ loc_pkgsrc_dir: Path
+ loc_distinfo: Path
+
+
+class BinaryPackage(NamedTuple):
+ """
+ A file containing a binary package for a particular platform.
+ """
+
+ file: Path
+ pkgname: str
+ build_info: Dict[str, str]
+ distinfo_entries: List[DistinfoEntry]
+
+ def load_build_info(self) -> Dict[str, str]:
+ if len(self.build_info) > 0:
+ return self.build_info
+
+ slash_file = str(self.file).replace('\\', '/')
+ for line in os.popen(f'{pkg_info} -q -B "{slash_file}"').readlines():
+ line = line.rstrip()
+ if line == '':
+ continue
+ key, value = line.split('=', 1)
+ self.build_info[key] = value
+
+ return self.build_info
+
+ def no_src_on_ftp(self) -> str:
+ return self.load_build_info().get('NO_SRC_ON_FTP', '')
+
+ def license(self) -> str:
+ return self.load_build_info().get('LICENSE', '')
+
+ def load_distinfo_entries(self, pkgsrc_dirs: List[Path]) -> List[DistinfoEntry]:
+ if len(self.distinfo_entries) > 0:
+ return self.distinfo_entries
+
+ distinfo_files = []
+ for line in os.popen(f'{pkg_info} -q -b "{self.file}"').readlines():
+ m = re.match(r'(?x) ([^\s:]+): \t \$NetBSD: .*?,v \s+ ([0-9.]+)', line)
+ if m and f'./{m.group(1)}'.endswith('/distinfo'):
+ distinfo_files.append(Path(m.group(1)))
+
+ #print(f'package {self.pkgname} has distinfo files: {distinfo_files}')
+
+ for distinfo_file in distinfo_files:
+ for pkgsrc_dir in pkgsrc_dirs:
+ distinfo = pkgsrc_dir / distinfo_file
+ #print(f'distinfo {distinfo}')
+ if distinfo.exists():
+ #print(f'exists')
+ for distinfo_entry in Distdir.load_distinfo(pkgsrc_dir, distinfo):
+ #print(f'entry {distinfo_entry}')
+ self.distinfo_entries.append(distinfo_entry)
+
+ return self.distinfo_entries
+
+
+class Distdir(NamedTuple):
+ """
+ A directory containing distribution files, such as /pub/pkgsrc/distfiles.
+
+ Each such directory is connected to one or more directories containing
+ binary packages, such as:
+ /pub/pkgsrc/packages/NetBSD/amd64/2022Q4/All
+ /pub/pkgsrc/packages/Linux/sparc/HEAD/All
+
+ Each such directory is connected to one or more pkgsrc trees, such as:
+ /usr/pkgsrc-trunk
+ /usr/pkgsrc-2022Q4
+ """
+
+ distdir: Path
+ database: Path
+ packages_dirs: List[Path]
+ pkgsrc_dirs: List[Path]
+
+ def validate(self):
+ distfiles = self.load_distfiles()
+ distinfo_entries = {} # XXX: self.load_pkgsrc()
+ binary_packages = self.load_binary_packages()
+
+ for distfile in distfiles:
+ debug('distfile %s', distfile)
+ for binary_package in binary_packages:
+ debug('%s', binary_package)
+ for distinfo_entry in sorted(distinfo_entries.values(), key=lambda e: e[0].distfile):
+ debug('%s', distinfo_entry)
+
+ self.validate_no_src_on_ftp(distfiles, binary_packages)
+
+ print("TODO: validate that GPL files are still there even after removing the binary package")
+
+ def validate_no_src_on_ftp(self, distfiles: List[Path], binary_packages: List[BinaryPackage]):
+ for binary_package in binary_packages:
+ for distinfo_entry in binary_package.load_distinfo_entries(self.pkgsrc_dirs):
+ if binary_package.no_src_on_ftp() != '':
+ if (self.distdir / distinfo_entry.distfile).exists():
+ print(f"error: '{distinfo_entry.distfile}' must not be in '{self.distdir}' because the package '{binary_package.pkgname}' has NO_SRC_ON_FTP
'{binary_package.no_src_on_ftp()}'.")
+
+ def load_distfiles(self):
+ distfiles: Set[Path] = set()
+ for root, dirs, files in os.walk(self.distdir):
+ for file in files:
+ distfiles.add(Path(root, file).relative_to(self.distdir))
+ return sorted(distfiles)
+
+ def load_binary_packages(self) -> List[BinaryPackage]:
+ binary_package_files: Set[Path] = set()
+ for packages_dir in self.packages_dirs:
+ for root, _, files in os.walk(packages_dir):
+ for file in files:
+ if file.endswith('.tgz'):
+ binary_package_files.add(Path(root, file))
+
+ binary_packages = []
+ for binary_package_file in binary_package_files:
+ binary_package = self.load_binary_package(binary_package_file)
+ if binary_package:
+ binary_packages.append(binary_package)
+
+ return sorted(binary_packages, key=lambda p: p.pkgname)
+
+ @staticmethod
+ def load_binary_package(file: Path) -> Optional[BinaryPackage]:
+ return BinaryPackage(file, file.name[:-len('.tgz')], {}, [])
+
+ @staticmethod
+ def list_distinfo_files(pkgsrc_dir: Path):
+ for root, _, files in os.walk(pkgsrc_dir):
+ for file in files:
+ if file == 'distinfo':
+ yield Path(root, file).relative_to(pkgsrc_dir)
+
+ def load_pkgsrc(self) -> Dict[Path, List[DistinfoEntry]]:
+ info: Dict[Path, List[DistinfoEntry]] = {}
+
+ for pkgsrc_dir in self.pkgsrc_dirs:
+ for file in self.list_distinfo_files(pkgsrc_dir):
+ for entry in self.load_distinfo(pkgsrc_dir, file):
+ if entry.distfile not in info:
+ info[entry.distfile] = list()
+ info[entry.distfile].append(entry)
+
+ return info
+
+ @staticmethod
+ def load_distinfo(pkgsrc_dir: Path, distinfo: Path) -> List[DistinfoEntry]:
+ by_path: Dict[Path, DistinfoEntry] = {}
+
+ with open(Path(pkgsrc_dir, distinfo), 'r') as f:
+ for line in f:
+ m = re.match(r'(?x) (\S+) \s \( (\S+) \) \s = \s (\S+)', line)
+ if m:
+ algorithm, path, value = m.groups()
+ path = Path(path)
+ if path not in by_path:
+ by_path[path] = DistinfoEntry(path, {}, pkgsrc_dir, distinfo)
+ by_path[path].hashes[algorithm] = value
+ return list(by_path.values())
+
+
+def load_configuration(file: str) -> List[Distdir]:
+ distdirs: List[Distdir] = []
+
+ with open(file) as f:
+ distdir: Optional[Path] = None
+ database: Optional[Path] = None
+ packages_dirs = []
+ pkgsrc_dirs = []
+
+ lineno = 0
+ for line in f:
+ line = line.strip()
+ lineno += 1
+
+ if line == '' or line.startswith('#'):
+ continue
+
+ words = line.split()
+ if len(words) != 2:
+ raise ValueError(f'{file}:{lineno}: invalid line')
+ cmd, arg = words
+
+ if cmd == 'distdir':
+ if distdir is not None:
+ distdirs.append(Distdir(distdir, database, packages_dirs, pkgsrc_dirs))
+ database = None
+ packages_dirs = []
+ pkgsrc_dirs = []
+ distdir = Path(words[1])
+ if not distdir.exists():
+ raise ValueError(f"{file}:{lineno}: '{distdir}' must be a directory")
+ continue
+
+ if distdir is None:
+ raise ValueError(f"{file}:{lineno}: '{words[0]}' requires a preceding 'distdir'")
+
+ if cmd == 'database':
+ database = Path(arg)
+ elif cmd == 'packages':
+ packages = Path(arg)
+ if not packages.exists():
+ raise ValueError(f"{file}:{lineno}: '{packages}' must be a directory")
+ packages_dirs.append(packages)
+ elif cmd == 'pkgsrc':
+ pkgsrc = Path(arg)
+ if not pkgsrc.exists():
+ raise ValueError(f"{file}:{lineno}: '{pkgsrc}' must be a directory")
+ pkgsrc_dirs.append(pkgsrc)
Home |
Main Index |
Thread Index |
Old Index