pkgsrc-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[pkgsrc/trunk]: pkgsrc/meta-pkgs/nltk_data nltk_data: add shared files for nl...



details:   https://anonhg.NetBSD.org/pkgsrc/rev/47f81d7eee41
branches:  trunk
changeset: 770085:47f81d7eee41
user:      wiz <wiz%pkgsrc.org@localhost>
date:      Wed Nov 24 15:56:18 2021 +0000

description:
nltk_data: add shared files for nltk_data packages

This also includes a tool to create these packages.

diffstat:

 meta-pkgs/nltk_data/common.mk |  24 +++++++++++++++++++++
 meta-pkgs/nltk_data/howto.md  |  21 ++++++++++++++++++
 meta-pkgs/nltk_data/split.py  |  49 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 94 insertions(+), 0 deletions(-)

diffs (106 lines):

diff -r c6c992770105 -r 47f81d7eee41 meta-pkgs/nltk_data/common.mk
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/meta-pkgs/nltk_data/common.mk     Wed Nov 24 15:56:18 2021 +0000
@@ -0,0 +1,24 @@
+# $NetBSD: common.mk,v 1.1 2021/11/24 15:56:18 wiz Exp $
+
+MASTER_SITES=  https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/${TYPE}/
+EXTRACT_SUFX?= .zip
+
+MAINTAINER?=   pkgsrc-users%NetBSD.org@localhost
+HOMEPAGE?=     https://www.nltk.org/data.html
+COMMENT?=      Natural Language Toolkit (NLTK) Data
+
+INSTALLATION_DIRS+=    share/nltk_data/${TYPE}
+
+UNPACK?=       no
+
+do-build:
+
+.if ${UNPACK} == "no"
+do-install:
+       ${INSTALL_DATA} ${_DISTDIR}/${DISTNAME}${EXTRACT_SUFX} ${DESTDIR}${PREFIX}/share/nltk_data/${TYPE}
+.else
+USE_TOOLS+=    pax
+
+do-install:
+       cd ${WRKDIR} && ${PAX} -pp -rw ${DISTNAME} ${DESTDIR}${PREFIX}/share/nltk_data/${TYPE}/
+.endif
diff -r c6c992770105 -r 47f81d7eee41 meta-pkgs/nltk_data/howto.md
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/meta-pkgs/nltk_data/howto.md      Wed Nov 24 15:56:18 2021 +0000
@@ -0,0 +1,21 @@
+# Sources
+
+Fetch https://www.nltk.org/nltk_data/ which is an XML file with an XSL
+stylesheet
+
+    wget -O nltk_data.xml  https://www.nltk.org/nltk_data/
+
+should work.
+This file contains one line per data, as of 2021-11-24 there are 108 entries,
+and some meta package information.
+
+# Generating the packages
+
+Update the date in `split.py` and run it:
+
+       split.py
+       
+It will generate one package for each entry in the list in textproc/nltk_data-${id}
+You'll then need to run 'make mdi' in each directory. If the package existed
+before, make sure that the data really changed (distinfo checksums/size differ)
+before committing.
diff -r c6c992770105 -r 47f81d7eee41 meta-pkgs/nltk_data/split.py
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/meta-pkgs/nltk_data/split.py      Wed Nov 24 15:56:18 2021 +0000
@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+
+import os
+import xml.etree.ElementTree as ET
+
+tree = ET.parse('nltk_data.xml')
+
+root = tree.getroot()
+
+for child in root[0]:
+    id = child.attrib["id"]
+    path = f"/usr/pkgsrc/textproc/nltk_data-{id}"
+    try:
+        os.mkdir(path)
+    except Exception:
+        pass
+    name = child.attrib["name"]
+    if "webpage" in child.attrib:
+        webpage = "HOMEPAGE=\t" + child.attrib["webpage"]
+    else:
+        webpage = ""
+    if "license" in child.attrib:
+        license = child.attrib["license"]
+    subdir = child.attrib["subdir"]
+    url = child.attrib["url"]
+    with open(path + "/Makefile", "w") as f:
+        print(f"""# $NetBSD: split.py,v 1.1 2021/11/24 15:56:18 wiz Exp $
+
+DISTNAME=      {id}
+PKGNAME=       nltk_data-{id}-20211124
+CATEGORIES=    textproc
+DIST_SUBDIR=   ${{PKGNAME_NOREV}}
+
+{webpage}
+COMMENT=       NLTK Data - {name}
+#LICENSE=      {license}
+
+TYPE=          {subdir}
+
+.include "../../meta-pkgs/nltk_data/common.mk"
+.include "../../mk/bsd.pkg.mk"
+""", file=f, end='')
+    with open(path + "/DESCR", "w") as f:
+        print(f"""This package contains data for NLTK, the Natural Language Toolkit.
+
+This package contains data from/for {name}.""", file=f)
+    with open(path + "/PLIST", "w") as f:
+        print(f"""@comment $NetBSD: split.py,v 1.1 2021/11/24 15:56:18 wiz Exp $
+share/nltk/{subdir}/{id}.zip""", file=f)



Home | Main Index | Thread Index | Old Index