pkgsrc-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[pkgsrc/trunk]: pkgsrc/textproc Restore split-thai 2.0 package after moving s...
details: https://anonhg.NetBSD.org/pkgsrc/rev/a51927454785
branches: trunk
changeset: 449072:a51927454785
user: scole <scole%pkgsrc.org@localhost>
date: Sat Mar 20 15:46:23 2021 +0000
description:
Restore split-thai 2.0 package after moving source from pkgsrc to MASTER_SITE_LOCAL
diffstat:
textproc/Makefile | 3 +-
textproc/split-thai/DESCR | 8 ++++
textproc/split-thai/Makefile | 88 ++++++++++++++++++++++++++++++++++++++++++++
textproc/split-thai/PLIST | 11 +++++
textproc/split-thai/distinfo | 10 +++++
5 files changed, 119 insertions(+), 1 deletions(-)
diffs (150 lines):
diff -r 775fc2b9a99c -r a51927454785 textproc/Makefile
--- a/textproc/Makefile Sat Mar 20 15:29:39 2021 +0000
+++ b/textproc/Makefile Sat Mar 20 15:46:23 2021 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.1203 2021/03/19 22:18:13 markd Exp $
+# $NetBSD: Makefile,v 1.1204 2021/03/20 15:46:23 scole Exp $
#
COMMENT= Text processing utilities (does not include desktop publishing)
@@ -1116,6 +1116,7 @@
SUBDIR+= sord
SUBDIR+= source-highlight
SUBDIR+= sphinxsearch
+SUBDIR+= split-thai
SUBDIR+= stardic
SUBDIR+= stava
SUBDIR+= sub2srt
diff -r 775fc2b9a99c -r a51927454785 textproc/split-thai/DESCR
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/textproc/split-thai/DESCR Sat Mar 20 15:46:23 2021 +0000
@@ -0,0 +1,8 @@
+A collection of utilities to split Thai Unicode UTF-8 text by word
+boundaries, also known as word tokenization or word breaking. The
+utilities use emacs, swath, perl, and a c++ icu-project program. All
+use dictionary-based word splitting.
+
+Also included is a merged dictionary file of Thai words, a perl script
+to grep Thai UTF-8 words, and an emacs library that can split and play
+audio for Thai words.
diff -r 775fc2b9a99c -r a51927454785 textproc/split-thai/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/textproc/split-thai/Makefile Sat Mar 20 15:46:23 2021 +0000
@@ -0,0 +1,88 @@
+# $NetBSD: Makefile,v 1.16 2021/03/20 15:46:23 scole Exp $
+
+ST_VERSION= 2.0
+PKGNAME= split-thai-${ST_VERSION}
+PKGREVISION= 1
+CATEGORIES= textproc
+
+MAINTAINER= scole%NetBSD.org@localhost
+HOMEPAGE= https://ftp.NetBSD.org/pub/pkgsrc/distfiles/LOCAL_PORTS/
+COMMENT= Utilities and an emacs library to split UTF-8 Thai text into words
+# pthai.el, other code, icu dict, swath dict
+LICENSE= 2-clause-bsd AND public-domain AND mit AND gnu-gpl-v2
+
+GITHUB_ICU_TAG= 61607c27732906d36c5bd4d23ecc092f89f53a2b
+DISTFILES= split-thai-${ST_VERSION}.tgz thaidict-${GITHUB_ICU_TAG}.txt
+
+SITES.split-thai-${ST_VERSION}.tgz= ${MASTER_SITE_LOCAL}
+SITES.thaidict-${GITHUB_ICU_TAG}.txt= -${MASTER_SITE_GITHUB:=unicode-org/}/icu/raw/${GITHUB_ICU_TAG}/icu4c/source/data/brkitr/dictionaries/thaidict.txt
+
+USE_LANGUAGES= c++11 # darwin needed 11?
+
+USE_TOOLS= pkg-config mkdir cp sh:run env awk cat sort uniq grep wc echo
+USE_TOOLS+= perl:run
+BUILD_DEPENDS+= libdatrie-[0-9]*:../../devel/libdatrie
+DEPENDS+= emacs-[0-9]*:../../editors/emacs
+DEPENDS+= swath-[0-9]*:../../textproc/swath
+DEPENDS+= mpg123-[0-9]*:../../audio/mpg123
+
+REPLACE_PERL= st-wordbreak tgrep
+REPLACE_SH= st-swath
+
+UTF8_ENV= env LC_ALL=C.UTF-8
+
+ST_SHARE_DIR= share/split-thai
+ST_SHARE_BIN= bin
+INSTALLATION_DIRS= ${ST_SHARE_BIN} ${ST_SHARE_DIR}
+
+ST_SHARE_FILES= README.txt pthai.el sampledict.txt words words.tri
+
+# xxx REPLACE_EMACS_SCRIPT
+SUBST_CLASSES+= st-emacs-app
+SUBST_STAGE.st-emacs-app= pre-configure
+SUBST_MESSAGE.st-emacs-app= Fixing emacs script paths.
+SUBST_FILES.st-emacs-app= st-emacs
+SUBST_SED.st-emacs-app= -e 's,!/bin/emacs,!${PREFIX}/bin/emacs,g'
+
+SUBST_CLASSES+= dictionary-app
+SUBST_STAGE.dictionary-app= post-extract
+SUBST_MESSAGE.dictionary-app= Fixing dictionary paths.
+SUBST_FILES.dictionary-app= st-emacs st-swath st-wordbreak pthai.el
+SUBST_SED.dictionary-app= -e 's,ST_SHARE_DIR,${PREFIX}/${ST_SHARE_DIR},g'
+SUBST_SED.dictionary-app+= -e 's,ST_SHARE_BIN,${PREFIX}/${ST_SHARE_BIN},g'
+
+pre-build:
+ cd ${WRKSRC} && ${UTF8_ENV} emacs --batch \
+ --eval='(setq pthai-bootstrap t)' \
+ --eval='(load-file "pthai.el")' \
+ --eval='(pthai-twt-table-save "thai-word-dict")'
+ cp ${WRKDIR}/thaidict-${GITHUB_ICU_TAG}.txt ${WRKSRC}/icu-dict
+ cd ${PREFIX}/share/swath && \
+ ${UTF8_ENV} trietool swathdic list | \
+ awk '{print $$1}' > ${WRKSRC}/swath-dict
+ cd ${WRKSRC} && \
+ ${UTF8_ENV} cat icu-dict swath-dict thai-word-dict | \
+ grep -v '#' | sort | uniq > words
+ cd ${WRKSRC} && \
+ ${UTF8_ENV} trietool words add-list -e utf-8 words
+.for i in thai-word-dict icu-dict swath-dict
+ @${ECHO} `wc -l ${WRKSRC}/${i} | awk '{print $$1}'` words in ${i}
+.endfor
+ @${ECHO} `wc -l ${WRKSRC}/words | awk '{print $$1}'` \
+ unique words in combined dictionary
+
+do-build:
+ cd ${WRKSRC} && \
+ ${CXX} ${CPPFLAGS} -o st-icu st-icu.cc \
+ `pkg-config --libs --cflags icu-io`
+
+do-install:
+ ${INSTALL_SCRIPT} ${WRKSRC}/st-emacs ${WRKSRC}/st-swath \
+ ${WRKSRC}/st-wordbreak ${WRKSRC}/tgrep ${DESTDIR}${PREFIX}/bin
+ ${INSTALL_PROGRAM} ${WRKSRC}/st-icu ${DESTDIR}${PREFIX}/bin
+.for i in ${ST_SHARE_FILES}
+ ${INSTALL_DATA} ${WRKSRC}/${i} ${DESTDIR}${PREFIX}/share/split-thai
+.endfor
+
+.include "../../textproc/icu/buildlink3.mk"
+.include "../../mk/bsd.pkg.mk"
diff -r 775fc2b9a99c -r a51927454785 textproc/split-thai/PLIST
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/textproc/split-thai/PLIST Sat Mar 20 15:46:23 2021 +0000
@@ -0,0 +1,11 @@
+@comment $NetBSD: PLIST,v 1.7 2021/03/20 15:46:23 scole Exp $
+bin/st-emacs
+bin/st-icu
+bin/st-swath
+bin/st-wordbreak
+bin/tgrep
+share/split-thai/README.txt
+share/split-thai/pthai.el
+share/split-thai/sampledict.txt
+share/split-thai/words
+share/split-thai/words.tri
diff -r 775fc2b9a99c -r a51927454785 textproc/split-thai/distinfo
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/textproc/split-thai/distinfo Sat Mar 20 15:46:23 2021 +0000
@@ -0,0 +1,10 @@
+$NetBSD: distinfo,v 1.3 2021/03/20 15:46:23 scole Exp $
+
+SHA1 (split-thai-2.0.tgz) = 80eb473e2038d889f12fd684388f88017fdec2d2
+RMD160 (split-thai-2.0.tgz) = 2280da7813940dc9eee1a2680425ba7366901566
+SHA512 (split-thai-2.0.tgz) = a9e0a101718857b8b3817918ff3cb4e9e0b6436a0df5cf0d2871ed5afd94f635cf07b1dba624c75aea1ce98da3a4e4403077f518683b92800aa383d71aa53829
+Size (split-thai-2.0.tgz) = 28541 bytes
+SHA1 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 2a2ad127cc279835cb4df04eb69401a0d4927774
+RMD160 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 0a6df7b7dd6ef502c5dd20020e37b2ca1a5514a2
+SHA512 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 88800fe2a453fc40f16ff54c21c852a8ea8e1496e42d5d187e5b5ac0ff58050830fc0816239e4f88cb23ed301f894d1ca52eb4676fd85c13c285cec815ae7c42
+Size (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 493044 bytes
Home |
Main Index |
Thread Index |
Old Index