Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[pkgsrc/trunk]: pkgsrc/textproc/split-thai Update to version 0.3
details: https://anonhg.NetBSD.org/pkgsrc/rev/2602c57302c0
branches: trunk
changeset: 437064:2602c57302c0
user: scole <scole%pkgsrc.org@localhost>
date: Sat Aug 15 16:52:28 2020 +0000
description:
Update to version 0.3
all changes for emacs splitter:
- load custom dictionary first because 'thai-word-table is a defvar
- add count function and return word counts for a few funcs
- add lisp wrappers functions split-thai, split-thai-line which
can split thai text in an emacs buffer using 'thai-break-words
diffstat:
textproc/split-thai/Makefile | 4 +-
textproc/split-thai/files/st-emacs | 5 +-
textproc/split-thai/files/thai-utility.el | 70 +++++++++++++++++++++++++-----
3 files changed, 61 insertions(+), 18 deletions(-)
diffs (140 lines):
diff -r de977317d996 -r 2602c57302c0 textproc/split-thai/Makefile
--- a/textproc/split-thai/Makefile Sat Aug 15 12:35:56 2020 +0000
+++ b/textproc/split-thai/Makefile Sat Aug 15 16:52:28 2020 +0000
@@ -1,6 +1,6 @@
-# $NetBSD: Makefile,v 1.2 2020/08/14 17:31:34 scole Exp $
+# $NetBSD: Makefile,v 1.3 2020/08/15 16:52:28 scole Exp $
-PKGNAME= split-thai-0.2
+PKGNAME= split-thai-0.3
CATEGORIES= textproc
MAINTAINER= pkgsrc-users%NetBSD.org@localhost
COMMENT= Utilities to split UTF-8 Thai text into words
diff -r de977317d996 -r 2602c57302c0 textproc/split-thai/files/st-emacs
--- a/textproc/split-thai/files/st-emacs Sat Aug 15 12:35:56 2020 +0000
+++ b/textproc/split-thai/files/st-emacs Sat Aug 15 16:52:28 2020 +0000
@@ -8,11 +8,10 @@
;;
;;(toggle-debug-on-error) ;; debug
-(require 'thai-word)
-;; load custom dictionary
+;; load custom dictionary first, 'thai-word-table is defvar
+(load "ST_SHARE_DIR/thai-dict" nil t)
(load "ST_SHARE_DIR/thai-utility" nil t)
-(load "ST_SHARE_DIR/thai-dict" nil t)
;; split a thai line by spaces, return new line
(defun process-thai-line(line)
diff -r de977317d996 -r 2602c57302c0 textproc/split-thai/files/thai-utility.el
--- a/textproc/split-thai/files/thai-utility.el Sat Aug 15 12:35:56 2020 +0000
+++ b/textproc/split-thai/files/thai-utility.el Sat Aug 15 16:52:28 2020 +0000
@@ -44,10 +44,12 @@
(defun thai-word-table-save(filename &optional alist)
"save thai words extracted from a nested-alist table to
filename in utf8 format, one word per line. default is to save
-'thai-word-table if no alist argument given."
- (interactive)
+'thai-word-table if no alist argument given. Returns number of
+dictionary words."
+ (interactive "FName of file to save to: \nP")
(let ((thaiwords)
(elem)
+ (line_count)
(coding-system-for-read 'utf-8)
(coding-system-for-write 'utf-8)
(buffer-file-coding-system 'utf-8))
@@ -72,8 +74,29 @@
(insert elem "\n")))
(sort-lines nil (point-min) (point-max))
+ (setq line_count (count-lines (point-min) (point-max)))
(write-region nil nil filename)
- (buffer-string))))
+ line_count)))
+
+(defun count-words-nested-alist (&optional alist)
+ "Count number of words in a nested alist. if no arg given,
+count 'thai-word-table words"
+ (interactive)
+ (let ((count 0)
+ (elem)
+ (thaiwords))
+ ;; default list or not
+ (setq alist (or alist thai-word-table))
+ (or (nested-alist-p alist)
+ (error "Invalid argument %s" alist))
+ ;; remove 'thai-words from 'thai-word-table
+ (setq alist (cdr alist))
+ (while (setq elem (car alist))
+ (setq alist (cdr alist))
+ (setq thaiwords (extract-thai-na elem ""))
+ (setq count (+ count (length thaiwords))))
+ (message "%d words in nested alist" count)
+ count))
;; 'thai-tis620 is default for emacs <= 28
(defun thai-update-word-table-utf8 (file &optional append)
@@ -99,25 +122,32 @@
(defun thai-word-table-save-defvar(dictfile lispfile)
"read a utf8 thai dictionary file and save to a lisp file
suitable for initializing the 'thai-word-table as a \"defvar\".
-Overwrites the lisp file if it exists."
+Overwrites the lisp file if it exists. Returns count of
+dictionary words."
(interactive)
(let ((header)
(footer)
(elem)
+ (line_count)
(coding-system-for-read 'utf-8)
(coding-system-for-write 'utf-8)
(buffer-file-coding-system 'utf-8))
- (setq header (list "(defvar thai-word-table"
- "(let ((table (list 'thai-words)))"
- "(dolist (elt"
- "'(" ))
- (setq footer (list "))"
- "(set-nested-alist elt 1 table))"
- "table)"
- "\"Nested alist of Thai words.\")" ))
+ (setq header (list
+ ";; file auto-generated from thai-word-table-save-defvar"
+ ""
+ "(defvar thai-word-table"
+ "(let ((table (list 'thai-words)))"
+ "(dolist (elt"
+ "'(" ))
+ (setq footer (list
+ "))"
+ "(set-nested-alist elt 1 table))"
+ "table)"
+ "\"Nested alist of Thai words.\")" ))
(with-temp-buffer
(insert-file-contents dictfile)
(goto-char (point-min))
+ (setq line_count (count-lines (point-min) (point-max)))
;; quote each thai word
(while (not (eobp))
(beginning-of-line)
@@ -135,4 +165,18 @@
(insert elem "\n"))
(lisp-mode)
(indent-region (point-min) (point-max))
- (write-region nil nil lispfile))))
+ (write-region nil nil lispfile))
+ line_count))
+
+(defun split-thai-line(&optional separator)
+ "Break Thai words from point to end of line by inserting a
+separator string at word boundaries. (wrapper for 'thai-break-words)"
+ (interactive)
+ (thai-break-words (or separator " ") (line-end-position)))
+
+(defun split-thai(&optional separator)
+ "Break Thai words from point to end of buffer by inserting a
+separator string at word boundaries. (wrapper for
+'thai-break-words)"
+ (interactive)
+ (thai-break-words (or separator " ") (point-max)))
Home |
Main Index |
Thread Index |
Old Index