Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[pkgsrc/trunk]: pkgsrc/textproc/split-thai Update to 0.7
details: https://anonhg.NetBSD.org/pkgsrc/rev/797cacdc4483
branches: trunk
changeset: 437309:797cacdc4483
user: scole <scole%pkgsrc.org@localhost>
date: Thu Aug 20 14:20:27 2020 +0000
description:
Update to 0.7
- don't skip strings with numbers [0-9] for st-icu
diffstat:
textproc/split-thai/Makefile | 4 ++--
textproc/split-thai/files/st-icu.cc | 7 ++++---
2 files changed, 6 insertions(+), 5 deletions(-)
diffs (51 lines):
diff -r 2bf616f39023 -r 797cacdc4483 textproc/split-thai/Makefile
--- a/textproc/split-thai/Makefile Thu Aug 20 14:01:48 2020 +0000
+++ b/textproc/split-thai/Makefile Thu Aug 20 14:20:27 2020 +0000
@@ -1,6 +1,6 @@
-# $NetBSD: Makefile,v 1.6 2020/08/19 16:52:25 scole Exp $
+# $NetBSD: Makefile,v 1.7 2020/08/20 14:20:27 scole Exp $
-PKGNAME= split-thai-0.6
+PKGNAME= split-thai-0.7
CATEGORIES= textproc
MAINTAINER= pkgsrc-users%NetBSD.org@localhost
COMMENT= Utilities to split UTF-8 Thai text into words
diff -r 2bf616f39023 -r 797cacdc4483 textproc/split-thai/files/st-icu.cc
--- a/textproc/split-thai/files/st-icu.cc Thu Aug 20 14:01:48 2020 +0000
+++ b/textproc/split-thai/files/st-icu.cc Thu Aug 20 14:20:27 2020 +0000
@@ -7,8 +7,8 @@
#include <unicode/brkiter.h>
#include <unicode/regex.h>
#include <unicode/ucnv.h>
+#include <unicode/ustdio.h>
#include <unicode/ustream.h>
-#include <unicode/ustdio.h>
using namespace std;
using namespace icu;
@@ -18,6 +18,7 @@
const UnicodeString thai_rexp = "[\\u0e00-\\u0e7f]+";
const UnicodeString thai_consonant = "[\\u0e01-\\u0e2e]+";
const UnicodeString thai_num_rexp = "[\\u0e50-\\u0e59]+";
+const UnicodeString number_rexp = "[0-9\\u0e50-\\u0e59]+";
const UnicodeString thai_nonnum_rexp = "[\\u0e01-\\u0e4f\\u0e5a-\\u0e7f]+";
void usage() {
@@ -56,7 +57,7 @@
// add spaces to string with thai numbers
UnicodeString space_thai_numbers(const UnicodeString &s) {
// return string unmodified if no numbers
- if ( ! matches_regexp(s, thai_num_rexp) ) {
+ if ( ! matches_regexp(s, number_rexp) ) {
return s;
}
@@ -105,7 +106,7 @@
}
// only one word found, trim and done
- if ( vbreak.size() == 1 ) {
+ if ( vbreak.size() <= 1 ) {
UnicodeString ss(s);
return ss.trim();
}
Home |
Main Index |
Thread Index |
Old Index