pkgsrc-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[pkgsrc/trunk]: pkgsrc/textproc/split-thai Update to 2.7



details:   https://anonhg.NetBSD.org/pkgsrc/rev/69c84d242f64
branches:  trunk
changeset: 457168:69c84d242f64
user:      scole <scole%pkgsrc.org@localhost>
date:      Thu Aug 19 20:42:26 2021 +0000

description:
Update to 2.7
all:
- added libreoffice thai dictionary words
pthai.el only:
- fix pthai-twt-* to handle larger dictionary
- add more thingatpoint functionality with forward-thaiword/backword-thaiword
- use pulse instead of region to highlight words when playing
- major code clean ups
- split up audio word downloads in subdirectories

diffstat:

 textproc/split-thai/Makefile |  25 ++++++++++++++++---------
 textproc/split-thai/distinfo |  14 +++++++++-----
 2 files changed, 25 insertions(+), 14 deletions(-)

diffs (84 lines):

diff -r 605015e43cae -r 69c84d242f64 textproc/split-thai/Makefile
--- a/textproc/split-thai/Makefile      Thu Aug 19 19:21:00 2021 +0000
+++ b/textproc/split-thai/Makefile      Thu Aug 19 20:42:26 2021 +0000
@@ -1,20 +1,23 @@
-# $NetBSD: Makefile,v 1.25 2021/07/28 17:45:59 scole Exp $
+# $NetBSD: Makefile,v 1.26 2021/08/19 20:42:26 scole Exp $
 
-ST_VERSION=    2.6
+ST_VERSION=    2.7
 PKGNAME=       split-thai-${ST_VERSION}
 CATEGORIES=    textproc
 
 MAINTAINER=    scole%NetBSD.org@localhost
 HOMEPAGE=      https://ftp.NetBSD.org/pub/pkgsrc/distfiles/LOCAL_PORTS/
 COMMENT=       Utilities and an emacs library to split UTF-8 Thai text into words
-# pthai.el, other code, icu dict, swath dict
-LICENSE=       2-clause-bsd AND public-domain AND mit AND gnu-gpl-v2
+# pthai.el, other code, icu dict, swath dict, libreoffice dict
+LICENSE=       2-clause-bsd AND public-domain AND mit AND gnu-gpl-v2 AND gnu-lgpl-v3
 
-GITHUB_ICU_TAG=        61607c27732906d36c5bd4d23ecc092f89f53a2b
-DISTFILES=     split-thai-${ST_VERSION}.tgz thaidict-${GITHUB_ICU_TAG}.txt
+GITHUB_ICU_TAG=                61607c27732906d36c5bd4d23ecc092f89f53a2b
+LIBREOFFICE_VERSION=   7-2-0
+LIBREOFFICE_SITE=      https://cgit.freedesktop.org/libreoffice/dictionaries/plain/th_TH/th_TH.dic?h=libreoffice-${LIBREOFFICE_VERSION}
+DISTFILES=             split-thai-${ST_VERSION}.tgz thaidict-${GITHUB_ICU_TAG}.txt th_TH-${LIBREOFFICE_VERSION}.dic
 
 SITES.split-thai-${ST_VERSION}.tgz=    ${MASTER_SITE_LOCAL}
 SITES.thaidict-${GITHUB_ICU_TAG}.txt=  -${MASTER_SITE_GITHUB:=unicode-org/}/icu/raw/${GITHUB_ICU_TAG}/icu4c/source/data/brkitr/dictionaries/thaidict.txt
+SITES.th_TH-${LIBREOFFICE_VERSION}.dic=        -${LIBREOFFICE_SITE}
 
 # darwin needed 11 to compile icu program?
 USE_LANGUAGES= c++11
@@ -51,21 +54,25 @@
 SUBST_SED.dictionary-app=      -e 's,ST_SHARE_DIR,${PREFIX}/${ST_SHARE_DIR},g'
 SUBST_SED.dictionary-app+=     -e 's,ST_SHARE_BIN,${PREFIX}/${ST_SHARE_BIN},g'
 
+# there are some dictionary entries with '/' and '"' that emacs
+# puthash doesn't like.  skip them with grep so consoles without utf8
+# understanding won't be munged when the failure to add prints out an error
 pre-build:
        cd ${WRKSRC} && ${UTF8_ENV} emacs --batch \
                --eval='(setq pthai-bootstrap t)' \
                --eval='(load-file "pthai.el")' \
                --eval='(pthai-twt-save "thai-word-dict")'
        cp ${WRKDIR}/thaidict-${GITHUB_ICU_TAG}.txt ${WRKSRC}/icu-dict
+       cp ${WRKDIR}/th_TH-${LIBREOFFICE_VERSION}.dic ${WRKSRC}/th_TH.dic
        cd ${PREFIX}/share/swath && \
                ${UTF8_ENV} trietool swathdic list | \
                awk '{print $$1}' > ${WRKSRC}/swath-dict
        cd ${WRKSRC} && \
-               ${UTF8_ENV} cat icu-dict swath-dict thai-word-dict | \
-                       grep -v '#' | sort | uniq > words
+               ${UTF8_ENV} cat icu-dict swath-dict th_TH.dic thai-word-dict | \
+                       grep -v '[#0123456789/"]' | sort | uniq > words
        cd ${WRKSRC} && \
                ${UTF8_ENV} trietool words add-list -e utf-8 words
-.for i in thai-word-dict icu-dict swath-dict
+.for i in icu-dict thai-word-dict th_TH.dic swath-dict
        @${ECHO} `wc -l ${WRKSRC}/${i} | awk '{print $$1}'` words in ${i}
 .endfor
        @${ECHO} `wc -l ${WRKSRC}/words | awk '{print $$1}'` \
diff -r 605015e43cae -r 69c84d242f64 textproc/split-thai/distinfo
--- a/textproc/split-thai/distinfo      Thu Aug 19 19:21:00 2021 +0000
+++ b/textproc/split-thai/distinfo      Thu Aug 19 20:42:26 2021 +0000
@@ -1,9 +1,13 @@
-$NetBSD: distinfo,v 1.9 2021/07/28 17:45:59 scole Exp $
+$NetBSD: distinfo,v 1.10 2021/08/19 20:42:26 scole Exp $
 
-SHA1 (split-thai-2.6.tgz) = 3c75fba9769ed201856ba1d3dd575ce749ed937f
-RMD160 (split-thai-2.6.tgz) = fbbedcccf723fb4e67506f281f59e3360b5a060f
-SHA512 (split-thai-2.6.tgz) = 13a013778f39425d13780151501066c2ecf4df37b4b09d8ed82eae1bf042cab68bd9e8d7044499dbdd960571b5c14d1a8aeeef000e918f6e627550f0cad46d35
-Size (split-thai-2.6.tgz) = 30546 bytes
+SHA1 (split-thai-2.7.tgz) = a5c1247b29b1661ddf4fb83217ae08abb21889ba
+RMD160 (split-thai-2.7.tgz) = 667deee03fb041703f833f436a4801594d3e456d
+SHA512 (split-thai-2.7.tgz) = eb64dcb843b537f442a1cb7ef1d20ff0b3418ab5e23ca6a355fc3b2becaef9df0d2e93eae53990bea24b2d38167fa0d884edd6d4fc80c1e5c0bda2e90666e02e
+Size (split-thai-2.7.tgz) = 30211 bytes
+SHA1 (th_TH-7-2-0.dic) = 5ac048444e7dee7c698a17bf9c62c9a9ded2a6d9
+RMD160 (th_TH-7-2-0.dic) = aae7d327b30dd124213d3cb03c022429adee698c
+SHA512 (th_TH-7-2-0.dic) = e2280e4a15fea69deaa20868909010342341ad8622337291492e2a4faf84fabc3441f5d34f47ace1d74cf41a21e0b983848ec686f1e4b5adea920f601f2e634b
+Size (th_TH-7-2-0.dic) = 1251425 bytes
 SHA1 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 2a2ad127cc279835cb4df04eb69401a0d4927774
 RMD160 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 0a6df7b7dd6ef502c5dd20020e37b2ca1a5514a2
 SHA512 (thaidict-61607c27732906d36c5bd4d23ecc092f89f53a2b.txt) = 88800fe2a453fc40f16ff54c21c852a8ea8e1496e42d5d187e5b5ac0ff58050830fc0816239e4f88cb23ed301f894d1ca52eb4676fd85c13c285cec815ae7c42



Home | Main Index | Thread Index | Old Index