tech-userlevel archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[Patch] Switch nvi from bundled regex to tre



By default, nvi uses its bundled regex which handles wchar_t strings.
However, it is still buggy for wide chars. On the other hand, tre also
provides wchar_t version of regex routines, that are "much better" as
I can see. For example, bracket expressions work fine at least for
ja_JP.UTF-8 and ja_JP.eucJP locales, that do not with the nvi-bundled
regex. I therefore propose to switch nvi from the bundled regex to tre.
For this purpose,

- Install headers from tre into /usr/include/tre, that may also be
  useful for 3rd party softwares.
  (Alternatively: do not install them, and nvi uses them directly from
  external/bsd/tre/somewhere)

- Build /rescue/vi (and other crunched-binary versions of nvi in
  miniroot) with USE_WIDECHAR == "no" in order not to link the extra
  library into them. Actually, this is not a real problem; they cannot
  handle wide chars even if they are built with USE_WIDECHAR == "yes",
  because locale stuffs in libc do not work for statically-linked
  binaries.

Any comments or suggestions?

rin
Index: distrib/sets/lists/base/mi
===================================================================
RCS file: /cvsroot/src/distrib/sets/lists/base/mi,v
retrieving revision 1.1164
diff -u -r1.1164 mi
--- distrib/sets/lists/base/mi	24 Oct 2017 02:22:09 -0000	1.1164
+++ distrib/sets/lists/base/mi	12 Nov 2017 12:09:33 -0000
@@ -1219,6 +1219,7 @@
 ./usr/include/ss				base-obsolete		obsolete
 ./usr/include/ssp				base-c-usr
 ./usr/include/sys				base-c-usr
+./usr/include/tre				base-c-usr
 ./usr/include/trousers				base-c-usr
 ./usr/include/tss				base-c-usr
 ./usr/include/ufs				base-c-usr
Index: distrib/sets/lists/comp/mi
===================================================================
RCS file: /cvsroot/src/distrib/sets/lists/comp/mi,v
retrieving revision 1.2159
diff -u -r1.2159 mi
--- distrib/sets/lists/comp/mi	7 Nov 2017 22:20:05 -0000	1.2159
+++ distrib/sets/lists/comp/mi	12 Nov 2017 12:09:46 -0000
@@ -3024,6 +3024,9 @@
 ./usr/include/termios.h				comp-c-include
 ./usr/include/threadlib.h			comp-obsolete		obsolete
 ./usr/include/time.h				comp-c-include
+./usr/include/tre/regex.h			comp-c-include
+./usr/include/tre/tre-config.h			comp-c-include
+./usr/include/tre/tre.h				comp-c-include
 ./usr/include/trousers/trousers.h		comp-c-include		tpm
 ./usr/include/trousers/tss.h			comp-c-include		tpm
 ./usr/include/tss/compat11b.h			comp-c-include		tpm
Index: etc/mtree/NetBSD.dist.base
===================================================================
RCS file: /cvsroot/src/etc/mtree/NetBSD.dist.base,v
retrieving revision 1.163
diff -u -r1.163 NetBSD.dist.base
--- etc/mtree/NetBSD.dist.base	21 Oct 2017 05:30:48 -0000	1.163
+++ etc/mtree/NetBSD.dist.base	12 Nov 2017 12:09:46 -0000
@@ -235,6 +235,7 @@
 ./usr/include/security
 ./usr/include/ssp
 ./usr/include/sys
+./usr/include/tre
 ./usr/include/trousers
 ./usr/include/tss
 ./usr/include/ufs
Index: external/bsd/nvi/dist/common/multibyte.h
===================================================================
RCS file: /home/netbsd/src/external/bsd/nvi/dist/common/multibyte.h,v
retrieving revision 1.4
diff -u -r1.4 multibyte.h
--- external/bsd/nvi/dist/common/multibyte.h	13 Nov 2017 01:34:59 -0000	1.4
+++ external/bsd/nvi/dist/common/multibyte.h	13 Nov 2017 02:42:49 -0000
@@ -113,4 +113,12 @@
 	((void *)((char *)MEMCPY(p, t, len) + (len) * sizeof(*(p))))
 #define SIZE(w)		(sizeof(w)/sizeof(*w))
 
+#if defined(USE_WIDECHAR) && defined(HAVE_TRE)
+#define	REGCOMP	regwcomp
+#define	REGEXEC	regwexec
+#else
+#define	REGCOMP	regcomp
+#define	REGEXEC	regexec
+#endif
+
 #endif
Index: external/bsd/nvi/dist/common/search.c
===================================================================
RCS file: /home/netbsd/src/external/bsd/nvi/dist/common/search.c,v
retrieving revision 1.3
diff -u -r1.3 search.c
--- external/bsd/nvi/dist/common/search.c	26 Jan 2014 21:43:45 -0000	1.3
+++ external/bsd/nvi/dist/common/search.c	13 Nov 2017 02:42:49 -0000
@@ -237,7 +237,7 @@
 		    lno, coff, len != 0 ? len - 1 : len);
 #endif
 		/* Search the line. */
-		eval = regexec(&sp->re_c, l, 1, match,
+		eval = REGEXEC(&sp->re_c, l, 1, match,
 		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
 		if (eval == REG_NOMATCH)
 			continue;
@@ -374,7 +374,7 @@
 		    "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
 #endif
 		/* Search the line. */
-		eval = regexec(&sp->re_c, l, 1, match,
+		eval = REGEXEC(&sp->re_c, l, 1, match,
 		    ((size_t)match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
 		if (eval == REG_NOMATCH)
 			continue;
@@ -409,7 +409,7 @@
 			if ((size_t)match[0].rm_so >= len)
 				break;
 			match[0].rm_eo = len;
-			eval = regexec(&sp->re_c, l, 1, match,
+			eval = REGEXEC(&sp->re_c, l, 1, match,
 			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
 			    REG_STARTEND);
 			if (eval == REG_NOMATCH)
Index: external/bsd/nvi/dist/ex/ex_global.c
===================================================================
RCS file: /home/netbsd/src/external/bsd/nvi/dist/ex/ex_global.c,v
retrieving revision 1.5
diff -u -r1.5 ex_global.c
--- external/bsd/nvi/dist/ex/ex_global.c	26 Jan 2014 21:43:45 -0000	1.5
+++ external/bsd/nvi/dist/ex/ex_global.c	13 Nov 2017 02:42:49 -0000
@@ -216,7 +216,7 @@
 		match[0].rm_so = 0;
 		match[0].rm_eo = len;
 		switch (eval =
-		    regexec(&sp->re_c, dbp, 0, match, REG_STARTEND)) {
+		    REGEXEC(&sp->re_c, dbp, 0, match, REG_STARTEND)) {
 		case 0:
 			if (cmd == V)
 				continue;
Index: external/bsd/nvi/dist/ex/ex_subst.c
===================================================================
RCS file: /home/netbsd/src/external/bsd/nvi/dist/ex/ex_subst.c,v
retrieving revision 1.4
diff -u -r1.4 ex_subst.c
--- external/bsd/nvi/dist/ex/ex_subst.c	26 Jan 2014 21:43:45 -0000	1.4
+++ external/bsd/nvi/dist/ex/ex_subst.c	13 Nov 2017 02:42:49 -0000
@@ -557,7 +557,7 @@
 		match[0].rm_eo = len;
 
 		/* Get the next match. */
-		eval = regexec(re, st + offset, 10, match, eflags);
+		eval = REGEXEC(re, st + offset, 10, match, eflags);
 
 		/*
 		 * There wasn't a match or if there was an error, deal with
@@ -992,7 +992,7 @@
 	 * Regcomp isn't 8-bit clean, so we just lost if the pattern
 	 * contained a nul.  Bummer!
 	 */
-	if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
+	if ((rval = REGCOMP(rep, ptrn, /* plen, */ reflags)) != 0) {
 		if (LF_ISSET(SEARCH_MSG))
 			re_error(sp, rval, rep); 
 		return (1);
Index: external/bsd/nvi/usr.bin/nvi/Makefile
===================================================================
RCS file: /cvsroot/src/external/bsd/nvi/usr.bin/nvi/Makefile,v
retrieving revision 1.12
diff -u -r1.12 Makefile
--- external/bsd/nvi/usr.bin/nvi/Makefile	13 Nov 2017 04:09:41 -0000	1.12
+++ external/bsd/nvi/usr.bin/nvi/Makefile	13 Nov 2017 04:10:29 -0000
@@ -1,8 +1,12 @@
-#	$NetBSD: Makefile,v 1.12 2017/11/13 04:09:41 rin Exp $
+#	$NetBSD: Makefile,v 1.11 2017/11/13 02:33:13 rin Exp $
 
 .include <bsd.own.mk>
 
+.ifndef SMALLPROG
 USE_WIDECHAR?=yes
+.else
+USE_WIDECHAR=no
+.endif
 
 CWARNFLAGS.clang+=	-Wno-uninitialized -Wno-format-security
 .if ${USE_WIDECHAR} != "yes"
@@ -57,8 +61,12 @@
 
 # For wide char support
 .if ${USE_WIDECHAR} == "yes"
-SRCS+=	regcomp.c regerror.c regexec.c regfree.c
-CPPFLAGS+=-I${DIST}/regex -D__REGEX_PRIVATE -DUSE_WIDECHAR
+CPPFLAGS+=-DUSE_WIDECHAR
+CPPFLAGS+=-I${DESTDIR}/usr/include/tre -DHAVE_TRE
+LDADD+=	-ltre
+DPADD+=	${LIBTRE}
+#SRCS+=	regcomp.c regerror.c regexec.c regfree.c
+#CPPFLAGS+=-I${DIST}/regex -D__REGEX_PRIVATE
 .endif
 
 # For db3 db1 emulation
Index: external/bsd/tre/Makefile.inc
===================================================================
RCS file: /cvsroot/src/external/bsd/tre/Makefile.inc,v
retrieving revision 1.2
diff -u -r1.2 Makefile.inc
--- external/bsd/tre/Makefile.inc	5 Nov 2011 22:39:12 -0000	1.2
+++ external/bsd/tre/Makefile.inc	12 Nov 2017 12:09:46 -0000
@@ -7,7 +7,5 @@
 CPPFLAGS+=	-I${TREDIST}/lib -I${.CURDIR}/../include
 CPPFLAGS+=	-DHAVE_CONFIG_H=1
 CPPFLAGS+=	-DTRE_SYSTEM_REGEX_H_PATH=\"${NETBSDSRCDIR}/include/regex.h\"
-CPPFLAGS+=	-DTRE_USE_SYSTEM_REGEX_H=1
-
 
 WARNS=	4
Index: external/bsd/tre/include/tre-config.h
===================================================================
RCS file: /cvsroot/src/external/bsd/tre/include/tre-config.h,v
retrieving revision 1.1
diff -u -r1.1 tre-config.h
--- external/bsd/tre/include/tre-config.h	5 Nov 2011 22:39:13 -0000	1.1
+++ external/bsd/tre/include/tre-config.h	12 Nov 2017 12:09:46 -0000
@@ -23,10 +23,12 @@
 #define TRE_MULTIBYTE 1
 
 /* Define to the absolute path to the system tre.h */
-/* #undef TRE_SYSTEM_REGEX_H_PATH */
+#ifndef TRE_SYSTEM_REGEX_H_PATH
+#define TRE_SYSTEM_REGEX_H_PATH "/usr/include/regex.h"
+#endif
 
 /* Define to include the system regex.h from tre.h */
-/* #undef TRE_USE_SYSTEM_REGEX_H */
+#define TRE_USE_SYSTEM_REGEX_H
 
 /* Define to enable wide character (wchar_t) support. */
 #define TRE_WCHAR 1
Index: external/bsd/tre/lib/Makefile
===================================================================
RCS file: /cvsroot/src/external/bsd/tre/lib/Makefile,v
retrieving revision 1.2
diff -u -r1.2 Makefile
--- external/bsd/tre/lib/Makefile	6 Nov 2011 10:55:27 -0000	1.2
+++ external/bsd/tre/lib/Makefile	12 Nov 2017 12:09:46 -0000
@@ -5,10 +5,11 @@
 #	./configure --prefix=/usr --without-alloca
 
 .include <bsd.own.mk>
-TREDIST=	${.CURDIR}/../dist
+TREDIR=	${.CURDIR}/..
 
 # external tre sources
-.PATH: ${TREDIST}/lib
+.PATH:	${TREDIR}/dist/lib
+.PATH:	${TREDIR}/include
 
 CPPFLAGS+=	-I${.CURDIR}
 
@@ -19,6 +20,9 @@
 SRCS+=	tre-match-backtrack.c tre-match-parallel.c tre-mem.c
 SRCS+=	tre-parse.c tre-stack.c xmalloc.c
 
+INCS=	regex.h tre-config.h tre.h
+INCSDIR=/usr/include/tre
+
 WARNS=	4
 
 .include <bsd.lib.mk>
Index: rescue/Makefile
===================================================================
RCS file: /home/netbsd/src/rescue/Makefile,v
retrieving revision 1.33
diff -u -r1.33 Makefile
--- rescue/Makefile	8 Oct 2017 15:02:33 -0000	1.33
+++ rescue/Makefile	12 Nov 2017 15:06:26 -0000
@@ -20,10 +20,12 @@
 CRUNCHBIN=	rescue
 CRUNCHENV=	RESCUEDIR=${RESCUEDIR}
 SMALLPROG=	0
-LISTS=		${.CURDIR}/list
 TARGETDIR=	${DESTDIR}/rescue
 PARSELISTENV+=  TARGETDIR=${TARGETDIR:Q}
 
+LISTS=		${.CURDIR}/list
+CRUNCHENV+=	USE_WIDECHAR=no		# for nvi
+
 .for f in ldconfig
 PROG_${f}!=	cd ${NETBSDSRCDIR}/sbin/${f} && ${MAKE} -V PROG
 .if (${PROG_${f}} != "")


Home | Main Index | Thread Index | Old Index