pkgsrc-Changes archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
CVS commit: pkgsrc/textproc/split-thai
Module Name: pkgsrc
Committed By: scole
Date: Wed Jul 17 17:32:16 UTC 2024
Modified Files:
pkgsrc/textproc/split-thai: DESCR Makefile PLIST distinfo
Log Message:
Update to version 2.29
- add 'pthai-copy-word-at-point 'pthai-copy-string-at-point
- remove all external-program word splitting functionality associated
with icu and swath
- for word splitting, use only internal elisp functions
- remove 'pthai-twt-lock, 'pthai-twt-splitter-enable. this results in
the 'thai-word-table always being modified when adding or removing words
- rename pthai-splitter-max-recurse-word-length to pthai-rwb-max-word-length
- remove pthai-temperature-* functions
To generate a diff of this commit:
cvs rdiff -u -r1.7 -r1.8 pkgsrc/textproc/split-thai/DESCR \
pkgsrc/textproc/split-thai/PLIST
cvs rdiff -u -r1.56 -r1.57 pkgsrc/textproc/split-thai/Makefile
cvs rdiff -u -r1.33 -r1.34 pkgsrc/textproc/split-thai/distinfo
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: pkgsrc/textproc/split-thai/DESCR
diff -u pkgsrc/textproc/split-thai/DESCR:1.7 pkgsrc/textproc/split-thai/DESCR:1.8
--- pkgsrc/textproc/split-thai/DESCR:1.7 Sun Oct 3 20:40:08 2021
+++ pkgsrc/textproc/split-thai/DESCR Wed Jul 17 17:32:15 2024
@@ -1,8 +1,6 @@
-A collection of utilities to split Thai Unicode UTF-8 text by word
-boundaries, also known as word tokenization or word breaking. The
-utilities use emacs, swath, perl, and a c++ icu-project program. All
-use dictionary-based word splitting.
+An emacs library for handling Thai Unicode UTF-8 words. It can split,
+unsplit, spellcheck, download and play audio for Thai text, practice
+vocabulary, and more.
-Also included is a merged dictionary file of Thai words, a perl script
-to grep Thai UTF-8 words, and an emacs library that can split,
-unsplit, spellcheck, and play audio for Thai words.
+Also included is a merged dictionary file of Thai words gathered from
+the libreoffice, icu, and swath websites.
Index: pkgsrc/textproc/split-thai/PLIST
diff -u pkgsrc/textproc/split-thai/PLIST:1.7 pkgsrc/textproc/split-thai/PLIST:1.8
--- pkgsrc/textproc/split-thai/PLIST:1.7 Sat Mar 20 15:46:23 2021
+++ pkgsrc/textproc/split-thai/PLIST Wed Jul 17 17:32:16 2024
@@ -1,11 +1,5 @@
-@comment $NetBSD: PLIST,v 1.7 2021/03/20 15:46:23 scole Exp $
-bin/st-emacs
-bin/st-icu
-bin/st-swath
-bin/st-wordbreak
-bin/tgrep
+@comment $NetBSD: PLIST,v 1.8 2024/07/17 17:32:16 scole Exp $
share/split-thai/README.txt
share/split-thai/pthai.el
share/split-thai/sampledict.txt
share/split-thai/words
-share/split-thai/words.tri
Index: pkgsrc/textproc/split-thai/Makefile
diff -u pkgsrc/textproc/split-thai/Makefile:1.56 pkgsrc/textproc/split-thai/Makefile:1.57
--- pkgsrc/textproc/split-thai/Makefile:1.56 Mon Jun 3 13:44:14 2024
+++ pkgsrc/textproc/split-thai/Makefile Wed Jul 17 17:32:16 2024
@@ -1,15 +1,14 @@
-# $NetBSD: Makefile,v 1.56 2024/06/03 13:44:14 ryoon Exp $
+# $NetBSD: Makefile,v 1.57 2024/07/17 17:32:16 scole Exp $
-ST_VERSION= 2.28
+ST_VERSION= 2.29
PKGNAME= split-thai-${ST_VERSION}
-PKGREVISION= 1
CATEGORIES= textproc
MAINTAINER= scole%NetBSD.org@localhost
HOMEPAGE= https://ftp.NetBSD.org/pub/pkgsrc/distfiles/LOCAL_PORTS/
-COMMENT= Utilities and an emacs library to split UTF-8 Thai text into words
-# pthai.el, other code, icu dict, swath dict, libreoffice dict
-LICENSE= 2-clause-bsd AND public-domain AND mit AND gnu-gpl-v2 AND gnu-lgpl-v3
+COMMENT= Emacs library to split UTF-8 Thai text into words and more
+# pthai.el, icu dict, swath dict, libreoffice dict
+LICENSE= 2-clause-bsd AND mit AND gnu-gpl-v2 AND gnu-lgpl-v3
GITHUB_ICU_TAG= 929cf40ecbf464bb133113995185c7353f2e106d
LIBREOFFICE_VERSION= 7-6-4
@@ -20,40 +19,24 @@ SITES.split-thai-${ST_VERSION}.tgz= ${MA
SITES.thaidict-${GITHUB_ICU_TAG}.txt= -${MASTER_SITE_GITHUB:=unicode-org/}/icu/raw/${GITHUB_ICU_TAG}/icu4c/source/data/brkitr/dictionaries/thaidict.txt
SITES.th_TH-${LIBREOFFICE_VERSION}.dic= -${LIBREOFFICE_SITE}
-USE_LANGUAGES= c++
-FORCE_CXX_STD= c++17
-
-USE_TOOLS= pkg-config mkdir cp sh:run env awk cat sort uniq grep wc echo
-USE_TOOLS+= perl:run
+USE_TOOLS= mkdir cp env awk cat sort uniq grep wc echo
TOOL_DEPENDS+= libdatrie-[0-9]*:../../devel/libdatrie
+TOOL_DEPENDS+= swath-[0-9]*:../../textproc/swath
DEPENDS+= emacs-[0-9]*:../../editors/emacs
-DEPENDS+= swath-[0-9]*:../../textproc/swath
DEPENDS+= sox-[0-9]*:../../audio/sox
-REPLACE_PERL= st-wordbreak tgrep
-REPLACE_SH= st-swath
-
UTF8_ENV= env LC_ALL=C.UTF-8
ST_SHARE_DIR= share/split-thai
-ST_SHARE_BIN= bin
-INSTALLATION_DIRS= ${ST_SHARE_BIN} ${ST_SHARE_DIR}
+INSTALLATION_DIRS= ${ST_SHARE_DIR}
-ST_SHARE_FILES= README.txt pthai.el sampledict.txt words words.tri
-
-# xxx REPLACE_EMACS_SCRIPT
-SUBST_CLASSES+= st-emacs-app
-SUBST_STAGE.st-emacs-app= pre-configure
-SUBST_MESSAGE.st-emacs-app= Fixing emacs script paths.
-SUBST_FILES.st-emacs-app= st-emacs
-SUBST_SED.st-emacs-app= -e 's,!/bin/emacs,!${PREFIX}/bin/emacs,g'
+ST_SHARE_FILES= README.txt pthai.el sampledict.txt words
SUBST_CLASSES+= dictionary-app
SUBST_STAGE.dictionary-app= pre-configure
SUBST_MESSAGE.dictionary-app= Fixing dictionary paths.
-SUBST_FILES.dictionary-app= st-emacs st-swath st-wordbreak pthai.el
+SUBST_FILES.dictionary-app= README.txt pthai.el
SUBST_SED.dictionary-app= -e 's,ST_SHARE_DIR,${PREFIX}/${ST_SHARE_DIR},g'
-SUBST_SED.dictionary-app+= -e 's,ST_SHARE_BIN,${PREFIX}/${ST_SHARE_BIN},g'
# there are some dictionary entries with '/' and '"' that emacs
# puthash doesn't like. skip them with grep so consoles without utf8
@@ -71,8 +54,6 @@ pre-build:
cd ${WRKSRC} && \
${UTF8_ENV} cat icu-dict swath-dict th_TH.dic thai-word-dict | \
grep -v '[#0123456789/"]' | sort | uniq > words
- cd ${WRKSRC} && \
- ${UTF8_ENV} trietool words add-list -e utf-8 words
.for i in icu-dict thai-word-dict th_TH.dic swath-dict
@${ECHO} `wc -l ${WRKSRC}/${i} | awk '{print $$1}'` words in ${i}
.endfor
@@ -80,17 +61,10 @@ pre-build:
unique words in combined dictionary
do-build:
- cd ${WRKSRC} && \
- ${CXX} ${CPPFLAGS} -o st-icu st-icu.cc \
- `pkg-config --libs --cflags icu-io`
do-install:
- ${INSTALL_SCRIPT} ${WRKSRC}/st-emacs ${WRKSRC}/st-swath \
- ${WRKSRC}/st-wordbreak ${WRKSRC}/tgrep ${DESTDIR}${PREFIX}/bin
- ${INSTALL_PROGRAM} ${WRKSRC}/st-icu ${DESTDIR}${PREFIX}/bin
.for i in ${ST_SHARE_FILES}
${INSTALL_DATA} ${WRKSRC}/${i} ${DESTDIR}${PREFIX}/share/split-thai
.endfor
-.include "../../textproc/icu/buildlink3.mk"
.include "../../mk/bsd.pkg.mk"
Index: pkgsrc/textproc/split-thai/distinfo
diff -u pkgsrc/textproc/split-thai/distinfo:1.33 pkgsrc/textproc/split-thai/distinfo:1.34
--- pkgsrc/textproc/split-thai/distinfo:1.33 Sun Jan 7 03:28:09 2024
+++ pkgsrc/textproc/split-thai/distinfo Wed Jul 17 17:32:16 2024
@@ -1,8 +1,8 @@
-$NetBSD: distinfo,v 1.33 2024/01/07 03:28:09 scole Exp $
+$NetBSD: distinfo,v 1.34 2024/07/17 17:32:16 scole Exp $
-BLAKE2s (split-thai-2.28.tgz) = f8926c4225774cb67180dc4aa09089880d8671c35399a0ed79191fdd65462768
-SHA512 (split-thai-2.28.tgz) = 2872f3dffba2e592493241aae2f6153c6566217d7c3fb070ade7885db176c772d35221b78222df5a0a264a2056205f82e39c91150b06c47ac87d30bf9a89b40b
-Size (split-thai-2.28.tgz) = 41273 bytes
+BLAKE2s (split-thai-2.29.tgz) = 37ed06703ca9fdcd7cda20017d82661075a4f8624772fa7257508292f22493ce
+SHA512 (split-thai-2.29.tgz) = aeb6ec0e6210adf7b509f357856d5571b853a1e1cc5dcdc73c87c50182762028a345844ec301513c86bb600cf40bdf6f2f2face7fe340ecf667f0ec6befc793a
+Size (split-thai-2.29.tgz) = 31649 bytes
BLAKE2s (th_TH-7-6-4.dic) = 976e7d64726682c474f8ae202e5aa94e495a862712e1f6351332d1ff324ef99d
SHA512 (th_TH-7-6-4.dic) = e2280e4a15fea69deaa20868909010342341ad8622337291492e2a4faf84fabc3441f5d34f47ace1d74cf41a21e0b983848ec686f1e4b5adea920f601f2e634b
Size (th_TH-7-6-4.dic) = 1251425 bytes
Home |
Main Index |
Thread Index |
Old Index