Subject: bin/34244: '/usr/bin/cut -c' is not utf-8 ready
To: None <gnats-admin@netbsd.org, netbsd-bugs@netbsd.org>
From: Aleksey Cheusov <cheusov@tut.by>
List: netbsd-bugs
Date: 08/20/2006 17:40:01
>Number: 34244
>Category: bin
>Synopsis: '/bin/cut -c' is not utf-8 ready
>Confidential: no
>Severity: non-critical
>Priority: medium
>Responsible: bin-bug-people
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Sun Aug 20 17:40:00 +0000 2006
>Originator: Aleksey Cheusov <cheusov@tut.by>
>Release: NetBSD 3.0_STABLE
>Organization:
Best regards, Aleksey Cheusov.
>Environment:
System: NetBSD chen.chizhovka.net 3.0_STABLE NetBSD 3.0_STABLE (GENERIC) #2: Sun Mar 12 12:49:58 GMT 2006 cheusov@chen:/usr/src/sys/arch/i386/compile/GENERIC i386
Architecture: i386
Machine: i386
>Description:
'cut -c' is equivalent to 'cut -b' and therefore doesn't handle
multibyte character sets (including utf-8) correctly.
Patch follows.
>Fix:
--=-=-=
Content-Type: text/x-patch
Content-Disposition: attachment; filename=cut-c.utf8.patch
Content-Description: patch for cut -c
Index: cut.c
===================================================================
RCS file: /cvsroot/src/usr.bin/cut/cut.c,v
retrieving revision 1.21
diff -u -r1.21 cut.c
--- cut.c 29 Jul 2006 02:01:24 -0000 1.21
+++ cut.c 20 Aug 2006 16:32:32 -0000
@@ -54,13 +54,16 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <wchar.h>
+int bflag;
int cflag;
char dchar;
int dflag;
int fflag;
int sflag;
+void b_cut(FILE *, const char *);
void c_cut(FILE *, const char *);
void f_cut(FILE *, const char *);
void get_list(char *);
@@ -83,6 +86,10 @@
while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
switch(ch) {
case 'b':
+ fcn = b_cut;
+ get_list(optarg);
+ bflag = 1;
+ break;
case 'c':
fcn = c_cut;
get_list(optarg);
@@ -110,9 +117,11 @@
argv += optind;
if (fflag) {
- if (cflag)
+ if (cflag || bflag)
usage();
- } else if (!cflag || dflag || sflag)
+ } else if ((!cflag && !bflag) || dflag || sflag)
+ usage();
+ else if (bflag && cflag)
usage();
if (*argv)
@@ -193,7 +202,7 @@
/* ARGSUSED */
void
-c_cut(FILE *fp, const char *fname)
+b_cut(FILE *fp, const char *fname)
{
int ch, col;
char *pos;
@@ -221,6 +230,35 @@
}
void
+c_cut(FILE *fp, const char *fname)
+{
+ wint_t ch;
+ int col;
+ char *pos;
+
+ ch = 0;
+ for (;;) {
+ pos = positions + 1;
+ for (col = maxval; col; --col) {
+ if ((ch = getwc(fp)) == WEOF)
+ return;
+ if (ch == '\n')
+ break;
+ if (*pos++)
+ (void)putwc(ch, stdout);
+ }
+ if (ch != '\n') {
+ if (autostop)
+ while ((ch = getwc(fp)) != WEOF && ch != '\n')
+ (void)putwc(ch, stdout);
+ else
+ while ((ch = getwc(fp)) != WEOF && ch != '\n');
+ }
+ (void)putwc('\n', stdout);
+ }
+}
+
+void
f_cut(FILE *fp, const char *fname)
{
int ch, field, isdelim;
--=-=-=--
>Unformatted:
--=-=-=