Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[pkgsrc/trunk]: pkgsrc/textproc/split-thai Update to 0.5
details: https://anonhg.NetBSD.org/pkgsrc/rev/1317708f5891
branches: trunk
changeset: 437206:1317708f5891
user: scole <scole%pkgsrc.org@localhost>
date: Tue Aug 18 17:10:28 2020 +0000
description:
Update to 0.5
- clean up some comments and simplify lisp code a bit
diffstat:
textproc/split-thai/Makefile | 4 ++--
textproc/split-thai/files/thai-utility.el | 31 ++++++++++++-------------------
2 files changed, 14 insertions(+), 21 deletions(-)
diffs (91 lines):
diff -r 35614c88b960 -r 1317708f5891 textproc/split-thai/Makefile
--- a/textproc/split-thai/Makefile Tue Aug 18 14:28:27 2020 +0000
+++ b/textproc/split-thai/Makefile Tue Aug 18 17:10:28 2020 +0000
@@ -1,6 +1,6 @@
-# $NetBSD: Makefile,v 1.4 2020/08/17 17:43:15 scole Exp $
+# $NetBSD: Makefile,v 1.5 2020/08/18 17:10:28 scole Exp $
-PKGNAME= split-thai-0.4
+PKGNAME= split-thai-0.5
CATEGORIES= textproc
MAINTAINER= pkgsrc-users%NetBSD.org@localhost
COMMENT= Utilities to split UTF-8 Thai text into words
diff -r 35614c88b960 -r 1317708f5891 textproc/split-thai/files/thai-utility.el
--- a/textproc/split-thai/files/thai-utility.el Tue Aug 18 14:28:27 2020 +0000
+++ b/textproc/split-thai/files/thai-utility.el Tue Aug 18 17:10:28 2020 +0000
@@ -98,13 +98,13 @@
(message "%d words in nested alist" count)
count))
-;; 'thai-tis620 is default for emacs <= 28
(defun thai-update-word-table-utf8 (file &optional append)
"Update Thai word table by replacing the current word list with
-FILE, which is in utf-8. If called with a prefix argument, FILE
-is appended instead to the current word list. Does the same as
+FILE, which is utf-8. If called with a prefix argument, FILE is
+appended instead to the current word list. Does the same as
'thai-update-word-table, except that function expects
-'thai-tis620 encoding"
+'thai-tis620 encoding which appears to be the default format for
+at least emacs version <= 28"
(interactive "FThai word table file: \nP")
(let* ((coding-system-for-read 'utf-8)
(coding-system-for-write 'utf-8)
@@ -146,23 +146,19 @@
"\"Nested alist of Thai words.\")" ))
(with-temp-buffer
(insert-file-contents dictfile)
+ ;; quote each thai word
(goto-char (point-min))
+ (while (search-forward-regexp "\\ct+" nil t)
+ (replace-match (concat "\"" (match-string 0) "\"")))
(setq line_count (count-lines (point-min) (point-max)))
- ;; quote each thai word
- (while (not (eobp))
- (beginning-of-line)
- (insert "\"")
- (end-of-line)
- (insert "\"")
- (forward-line 1))
-
+ ;; insert lisp code for defvar
(goto-char (point-min))
(dolist (elem header)
(insert elem "\n"))
-
(goto-char (point-max))
(dolist (elem footer)
(insert elem "\n"))
+ ;; indent for lisp and save
(lisp-mode)
(indent-region (point-min) (point-max))
(write-region nil nil lispfile))
@@ -170,15 +166,14 @@
(defun split-thai-line()
"Break Thai words from point to end of line by inserting a
-separator string at word boundaries. (wrapper for 'thai-break-words)"
+space at word boundaries. (wrapper for 'thai-break-words)"
(interactive)
(thai-break-words " " (line-end-position))
(split-thai-numbers (point) (line-end-position)))
(defun split-thai()
"Break Thai words from point to end of buffer by inserting a
-separator string at word boundaries. (wrapper for
-'thai-break-words)"
+space at word boundaries. (wrapper for 'thai-break-words)"
(interactive)
(thai-break-words " " (point-max))
(split-thai-numbers (point) (point-max)))
@@ -188,9 +183,7 @@
'thai-break-words doesn't always split numbers properly. this may
improve tokenization somewhat."
;; xxx this really should be fixed in 'thai-word lib
- (let* (
- ;; "\\([๐๑๒๓๔๕๖๗๘๙0123456789]+\\)"
- (num_rexp "\\([\u0e50-\u0e59]+\\)") ;; thai numbers
+ (let* ((num_rexp "\\([\u0e50-\u0e59]+\\)") ;; thai numbers
(nonnum_rexp "\\([\u0e00-\u0e4f\u0e5a-\u0e7f]\\)") ;; "non-numbers"
(trailing_rexp (concat num_rexp nonnum_rexp))
(leading_rexp (concat nonnum_rexp num_rexp)))
Home |
Main Index |
Thread Index |
Old Index