NetBSD-Bugs archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: lib/52374: <uchar.h> missing
The following reply was made to PR lib/52374; it has been noted by GNATS.
From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
To: gnats-bugs%NetBSD.org@localhost, netbsd-bugs%NetBSD.org@localhost
Cc: wiz%NetBSD.org@localhost
Subject: Re: lib/52374: <uchar.h> missing
Date: Wed, 14 Aug 2024 21:17:05 +0000
This is a multi-part message in MIME format.
--=_iI1xPvve3uCYqEU41Ybw8A5VS88pKDrs
I drafted an implementation here, using automatic tests from FreeBSD:
https://anonhg.netbsd.org/src-draft/log/bb62e596ad68
Requires libc minor bump for new symbols. Squashed patch attached for
review (changeset series is messy, not to be committed as is).
Planning to commit later this week. Doesn't do _l variants at the
moment, but that shouldn't be hard to add.
--=_iI1xPvve3uCYqEU41Ybw8A5VS88pKDrs
Content-Type: text/plain; charset="ISO-8859-1"; name="pr52374-ucharh"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment; filename="pr52374-ucharh.patch"
diff -r 529c5dc3c6e7 distrib/sets/lists/base/shl.mi
--- a/distrib/sets/lists/base/shl.mi Mon Aug 12 21:49:59 2024 +0000
+++ b/distrib/sets/lists/base/shl.mi Wed Aug 14 21:13:10 2024 +0000
@@ -22,7 +22,7 @@
./lib/libblocklist.so.0.1 base-sys-shlib dynamicroot
./lib/libc.so base-sys-shlib dynamicroot
./lib/libc.so.12 base-sys-shlib dynamicroot
-./lib/libc.so.12.221 base-sys-shlib dynamicroot
+./lib/libc.so.12.222 base-sys-shlib dynamicroot
./lib/libcrypt.so base-sys-shlib dynamicroot
./lib/libcrypt.so.1 base-sys-shlib dynamicroot
./lib/libcrypt.so.1.0 base-sys-shlib dynamicroot
@@ -257,7 +257,7 @@
./usr/lib/libc++.so.1.0 base-sys-shlib compatfile,libcxx
./usr/lib/libc.so base-sys-shlib compatfile
./usr/lib/libc.so.12 base-sys-shlib compatfile
-./usr/lib/libc.so.12.221 base-sys-shlib compatfile
+./usr/lib/libc.so.12.222 base-sys-shlib compatfile
./usr/lib/libcbor.so base-sys-shlib compatfile
./usr/lib/libcbor.so.0 base-sys-shlib compatfile
./usr/lib/libcbor.so.0.5 base-sys-shlib compatfile
diff -r 529c5dc3c6e7 distrib/sets/lists/comp/mi
--- a/distrib/sets/lists/comp/mi Mon Aug 12 21:49:59 2024 +0000
+++ b/distrib/sets/lists/comp/mi Wed Aug 14 21:13:10 2024 +0000
@@ -3679,6 +3679,7 @@
./usr/include/tss/tss_typedef.h comp-c-include tpm
./usr/include/ttyent.h comp-c-include
./usr/include/tzfile.h comp-c-include
+./usr/include/uchar.h comp-c-include
./usr/include/ucontext.h comp-c-include
./usr/include/ufs/ext2fs/ext2fs.h comp-c-include
./usr/include/ufs/ext2fs/ext2fs_dinode.h comp-c-include
diff -r 529c5dc3c6e7 distrib/sets/lists/debug/mi
--- a/distrib/sets/lists/debug/mi Mon Aug 12 21:49:59 2024 +0000
+++ b/distrib/sets/lists/debug/mi Wed Aug 14 21:13:10 2024 +0000
@@ -2058,11 +2058,15 @@
./usr/libdata/debug/usr/tests/lib/libc/inet/t_inet_network.debug tests-lib=
-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/kevent_nullmnt/h_nullmnt.debug test=
s-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_btowc.debug tests-lib-deb=
ug debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_c16rtomb.debug tests-lib-=
debug debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_c32rtomb.debug tests-lib-=
debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_ctype1.debug tests-obsole=
te obsolete,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_ctype2.debug tests-obsole=
te obsolete,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_digittoint.debug tests-lib=
-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_ducet.debug tests-lib-deb=
ug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_io.debug tests-lib-debug =
debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtoc16.debug tests-lib-=
debug debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtoc32.debug tests-lib-=
debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtowc.debug tests-lib-d=
ebug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbsnrtowcs.debug tests-lib=
-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbstowcs.debug tests-lib-=
debug debug,atf,compattestfile
@@ -2070,6 +2074,7 @@
./usr/libdata/debug/usr/tests/lib/libc/locale/t_sprintf.debug tests-lib-d=
ebug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_strfmon.debug tests-lib-d=
ebug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_toupper.debug tests-lib-d=
ebug debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_uchar.debug tests-lib-deb=
ug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_wcscoll.debug tests-lib-d=
ebug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_wcscspn.debug tests-lib-d=
ebug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/locale/t_wcspbrk.debug tests-lib-d=
ebug debug,atf,compattestfile
diff -r 529c5dc3c6e7 distrib/sets/lists/tests/mi
--- a/distrib/sets/lists/tests/mi Mon Aug 12 21:49:59 2024 +0000
+++ b/distrib/sets/lists/tests/mi Wed Aug 14 21:13:10 2024 +0000
@@ -3073,11 +3073,15 @@
./usr/tests/lib/libc/locale/Atffile tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/Kyuafile tests-lib-tests compattestfile,atf=
,kyua
./usr/tests/lib/libc/locale/t_btowc tests-lib-tests compattestfile,atf
+./usr/tests/lib/libc/locale/t_c16rtomb tests-lib-tests compattestfile,a=
tf
+./usr/tests/lib/libc/locale/t_c32rtomb tests-lib-tests compattestfile,a=
tf
./usr/tests/lib/libc/locale/t_ctype1 tests-obsolete obsolete
./usr/tests/lib/libc/locale/t_ctype2 tests-obsolete obsolete
./usr/tests/lib/libc/locale/t_digittoint tests-lib-tests compattestfile,=
atf
./usr/tests/lib/libc/locale/t_ducet tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/t_io tests-lib-tests compattestfile,atf
+./usr/tests/lib/libc/locale/t_mbrtoc16 tests-lib-tests compattestfile,a=
tf
+./usr/tests/lib/libc/locale/t_mbrtoc32 tests-lib-tests compattestfile,a=
tf
./usr/tests/lib/libc/locale/t_mbrtowc tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/t_mbsnrtowcs tests-lib-tests compattestfile,=
atf
./usr/tests/lib/libc/locale/t_mbstowcs tests-lib-tests compattestfile,a=
tf
@@ -3085,6 +3089,7 @@
./usr/tests/lib/libc/locale/t_sprintf tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/t_strfmon tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/t_toupper tests-lib-tests compattestfile,atf
+./usr/tests/lib/libc/locale/t_uchar tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/t_wcscoll tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/t_wcscspn tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/locale/t_wcspbrk tests-lib-tests compattestfile,atf
diff -r 529c5dc3c6e7 include/Makefile
--- a/include/Makefile Mon Aug 12 21:49:59 2024 +0000
+++ b/include/Makefile Wed Aug 14 21:13:10 2024 +0000
@@ -21,6 +21,7 @@ INCS=3D a.out.h aio.h ar.h assert.h atomic
string.h sgtty.h signal.h spawn.h stab.h stdalign.h stddef.h stdio.h \
stdlib.h stdnoreturn.h strings.h stringlist.h struct.h sysexits.h \
tar.h time.h ttyent.h tzfile.h \
+ uchar.h \
ucontext.h ulimit.h unistd.h util.h utime.h utmp.h utmpx.h uuid.h \
vis.h wchar.h wctype.h wordexp.h
INCS+=3D arpa/ftp.h arpa/inet.h arpa/nameser.h arpa/nameser_compat.h \
diff -r 529c5dc3c6e7 include/uchar.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/include/uchar.h Wed Aug 14 21:13:10 2024 +0000
@@ -0,0 +1,82 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTO=
RS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIM=
ITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICU=
LAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTO=
RS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF =
THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * C11, 7.28: Unicode utilities <uchar.h>
+ *
+ * `1. The header <uchar.h> declares types and functions for
+ * manipulating Unicode characters.'
+ */
+
+#ifndef _UCHAR_H
+#define _UCHAR_H
+
+#include <sys/ansi.h>
+
+/*
+ * `2. The types declared are mbstate_t (described in 7.30.1) and
+ * size_t (described in 7.19);
+ *
+ * char16_t
+ *
+ * which is an unsigned integer type used for 16-bit
+ * characters and is the same type as uint_least16_t
+ * (described in 7.20.1.2); and
+ *
+ * char32_t
+ *
+ * which is an unsigned integer type used for 32-bit
+ * characters and is the same type as uint_least32_t (also
+ * described in 7.20.1.2).'
+ */
+
+#ifdef _BSD_MBSTATE_T_
+typedef _BSD_MBSTATE_T_ mbstate_t;
+#undef _BSD_MBSTATE_T_
+#endif
+
+#ifdef _BSD_SIZE_T_
+typedef _BSD_SIZE_T_ size_t;
+#undef _BSD_SIZE_T_
+#endif
+
+typedef __UINT_LEAST16_TYPE__ char16_t;
+typedef __UINT_LEAST32_TYPE__ char32_t;
+
+__BEGIN_DECLS
+
+size_t mbrtoc16(char16_t *__restrict, const char *__restrict, size_t,
+ mbstate_t *__restrict);
+size_t c16rtomb(char *__restrict, char16_t, mbstate_t *__restrict);
+size_t mbrtoc32(char32_t *__restrict, const char *__restrict, size_t,
+ mbstate_t *__restrict);
+size_t c32rtomb(char *__restrict, char32_t, mbstate_t *__restrict);
+
+__END_DECLS
+
+#endif /* _UCHAR_H */
diff -r 529c5dc3c6e7 lib/libc/locale/Makefile.inc
--- a/lib/libc/locale/Makefile.inc Mon Aug 12 21:49:59 2024 +0000
+++ b/lib/libc/locale/Makefile.inc Wed Aug 14 21:13:10 2024 +0000
@@ -11,6 +11,13 @@ SRCS+=3D setlocale.c __mb_cur_max.c \
wcstol.c wcstoll.c wcstoimax.c wcstoul.c wcstoull.c wcstoumax.c \
wcstod.c wcstof.c wcstold.c wcscoll.c wcsxfrm.c wcsftime.c
=20
+SRCS+=3D c16rtomb.c
+SRCS+=3D c32rtomb.c
+SRCS+=3D mbrtoc16.c
+SRCS+=3D mbrtoc32.c
+CPPFLAGS.c32rtomb.c+=3D -I${LIBCDIR}/citrus
+CPPFLAGS.mbrtoc32.c+=3D -I${LIBCDIR}/citrus
+
# citrus multibyte locale support
# we have quirk for libc.a - see the last part of lib/libc/Makefile
CPPFLAGS+=3D -DWITH_RUNE -I${.CURDIR}
diff -r 529c5dc3c6e7 lib/libc/locale/c16rtomb.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/libc/locale/c16rtomb.c Wed Aug 14 21:13:10 2024 +0000
@@ -0,0 +1,164 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTO=
RS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIM=
ITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICU=
LAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTO=
RS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF =
THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * c16rtomb(s, c16, ps)
+ *
+ * Encode the Unicode UTF-16 code unit c16, which may be surrogate
+ * code point, into the multibyte buffer s under the current
+ * locale, using multibyte encoding state ps.
+ *
+ * If c16 is a high surrogate, no output will be produced, but c16
+ * will be remembered; this must be followed by another call
+ * passing the trailing low surrogate.
+ *
+ * If c16 is a low surrogate, it must have been preceded by a call
+ * with the leading high surrogate; at this point the combined
+ * code point will be produced as output.
+ *
+ * Return the number of bytes stored on success, or (size_t)-1 on
+ * error with errno set to EILSEQ.
+ *
+ * At most MB_CUR_MAX bytes will be stored.
+ *
+ * References:
+ *
+ * The Unicode Standard, Version 15.0 -- Core Specification, The
+ * Unicode Consortium, Sec. 3.8 `Surrogates', p. 119.
+ * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf=
#page=3D144
+ * https://web.archive.org/web/20240718101254/https://www.unicode.org/vers=
ions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=3D144
+ *
+ * The Unicode Standard, Version 15.0 -- Core Specification, The
+ * Unicode Consortium, Sec. 3.9 `Unicode Encoding Forms': UTF-16,
+ * p. 124.
+ * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf=
#page=3D150
+ * https://web.archive.org/web/20240718101254/https://www.unicode.org/vers=
ions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=3D150
+ *
+ * P. Hoffman and F. Yergeau, `UTF-16, an encoding of ISO 10646',
+ * RFC 2781, Internet Engineering Task Force, February 2000,
+ * Sec. 2.2: `Decoding UTF-16'.
+ * https://datatracker.ietf.org/doc/html/rfc2781#section-2.2
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD$");
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <uchar.h>
+
+#include "c32rtomb.h"
+
+struct c16rtombstate {
+ char16_t surrogate;
+ mbstate_t mbs;
+};
+__CTASSERT(sizeof(struct c32rtombstate) <=3D sizeof(mbstate_t) -
+ offsetof(struct c16rtombstate, mbs));
+__CTASSERT(_Alignof(struct c16rtombstate) <=3D _Alignof(mbstate_t));
+
+size_t
+c16rtomb(char *restrict s, char16_t c16, mbstate_t *restrict ps)
+{
+ static mbstate_t psbuf;
+ char buf[MB_LEN_MAX];
+ struct c16rtombstate *S;
+ char32_t c32;
+
+ /*
+ * `If ps is a null pointer, each function uses its own
+ * internal mbstate_t object instead, which is initialized at
+ * program startup to the initial conversion state; the
+ * functions are not required to avoid data races with other
+ * calls to the same function in this case. The
+ * implementation behaves as if no library function calls
+ * these functions with a null pointer for ps.'
+ */
+ if (ps =3D=3D NULL)
+ ps =3D &psbuf;
+
+ /*
+ * `If s is a null pointer, the c16rtomb function is equivalent
+ * to the call
+ *
+ * c16rtomb(buf, L'\0', ps)
+ *
+ * where buf is an internal buffer.
+ */
+ if (s =3D=3D NULL) {
+ s =3D buf;
+ c16 =3D L'\0';
+ }
+
+ /*
+ * Open the private UTF-16 decoding state.
+ */
+ S =3D (struct c16rtombstate *)ps;
+
+ /*
+ * Check whether:
+ *
+ * 1. We had previously decoded a high surrogate.
+ * =3D> Decode the low surrogate -- reject if it's not a low
+ * surrogate -- and combine them to output a code point;
+ * clear the high surrogate for next time.
+ * 2. This is a high surrogate.
+ * =3D> Save it and wait for the low surrogate with no output.
+ * 3. This is a low surrogate.
+ * =3D> Reject.
+ * 4. This is not a surrogate.
+ * =3D> Output a code point.
+ */
+ if (S->surrogate !=3D 0) { /* 1. pending surrogate pair */
+ if (c16 < 0xdc00 || c16 > 0xdfff) {
+ errno =3D EILSEQ;
+ return (size_t)-1;
+ }
+ const char16_t w1 =3D S->surrogate;
+ const char16_t w2 =3D c16;
+ c32 =3D __SHIFTIN(__SHIFTOUT(w1, __BITS(9,0)), __BITS(19,10)) |
+ __SHIFTIN(__SHIFTOUT(w2, __BITS(9,0)), __BITS(9,0));
+ c32 +=3D 0x10000;
+ S->surrogate =3D 0;
+ } else if (c16 >=3D 0xd800 && c16 <=3D 0xdbff) { /* 2. high surrogate */
+ S->surrogate =3D c16;
+ return 0; /* produced nothing */
+ } else if (c16 >=3D 0xdc00 && c16 <=3D 0xdfff) { /* 3. low surrogate */
+ errno =3D EILSEQ;
+ return (size_t)-1;
+ } else { /* 4. not a surrogate */
+ c32 =3D c16;
+ }
+
+ /*
+ * We have a code point. Output it.
+ */
+ return c32rtomb(s, c32, &S->mbs);
+}
diff -r 529c5dc3c6e7 lib/libc/locale/c32rtomb.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/libc/locale/c32rtomb.c Wed Aug 14 21:13:10 2024 +0000
@@ -0,0 +1,152 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTO=
RS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIM=
ITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICU=
LAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTO=
RS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF =
THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * c32rtomb(s, c32, ps)
+ *
+ * Encode the Unicode UTF-32 code unit c32, which must not be a
+ * surrogate code point, into the multibyte buffer s under the
+ * current locale, using multibyte encoding state ps. A UTF-32
+ * code unit is also a Unicode scalar value, which is any Unicode
+ * code point except a surrogate.
+ *
+ * Return the number of bytes stored on success, or (size_t)-1 on
+ * error with errno set to EILSEQ.
+ *
+ * At most MB_CUR_MAX bytes will be stored.
+ *
+ * References:
+ *
+ * The Unicode Standard, Version 15.0 -- Core Specification, The
+ * Unicode Consortium, Sec. 3.8 `Surrogates', p. 119.
+ * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+ * https://web.archive.org/web/20240718101254/https://www.unicode.org/vers=
ions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD$");
+
+#include <sys/types.h> /* broken citrus_*.h */
+#include <sys/queue.h> /* broken citrus_*.h */
+
+#include <assert.h>
+#include <errno.h>
+#include <langinfo.h>
+#include <limits.h>
+#include <paths.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <uchar.h>
+#include <wchar.h>
+
+#include "citrus_types.h" /* broken citrus_iconv.h */
+#include "citrus_module.h" /* broken citrus_iconv.h */
+#include "citrus_hash.h" /* broken citrus_iconv.h */
+#include "citrus_iconv.h"
+
+size_t
+c32rtomb(char *restrict s, char32_t c32, mbstate_t *restrict ps)
+{
+ char buf[MB_LEN_MAX];
+ struct _citrus_iconv *iconv =3D NULL;
+ char srcbuf[4];
+ const char *src;
+ char *dst;
+ size_t srcleft, dstleft, inval, len;
+ int error;
+
+ /*
+ * `If s is a null pointer, the c32rtomb function is equivalent
+ * to the call
+ *
+ * c32rtomb(buf, L'\0', ps)
+ *
+ * where buf is an internal buffer.'
+ */
+ if (s =3D=3D NULL) {
+ s =3D buf;
+ c32 =3D L'\0';
+ }
+
+ /*
+ * Reject surrogates.
+ */
+ if (c32 >=3D 0xd800 && c32 <=3D 0xdfff) {
+ errno =3D EILSEQ;
+ len =3D (size_t)-1;
+ goto out;
+ }
+
+ /*
+ * Open an iconv handle to convert UTF-32LE to locale-dependent
+ * multibyte output.
+ */
+ if ((error =3D _citrus_iconv_open(&iconv, _PATH_ICONV, "utf-32le",
+ nl_langinfo(CODESET))) !=3D 0) {
+ errno =3D EIO; /* XXX? */
+ len =3D (size_t)-1;
+ goto out;
+ }
+
+ /*
+ * Convert from UTF-32LE in our buffer.
+ */
+ le32enc(srcbuf, c32);
+ src =3D srcbuf;
+ srcleft =3D sizeof(srcbuf);
+ dst =3D s;
+ dstleft =3D MB_CUR_MAX;
+ error =3D _citrus_iconv_convert(iconv,
+ &src, &srcleft,
+ &dst, &dstleft,
+ _CITRUS_ICONV_F_HIDE_INVALID, &inval);
+ if (error) { /* can't be incomplete, must be error */
+ errno =3D error;
+ len =3D (size_t)-1;
+ goto out;
+ }
+ _DIAGASSERT(srcleft =3D=3D 0);
+ _DIAGASSERT(dstleft <=3D MB_CUR_MAX);
+
+ /*
+ * If we didn't produce any output, that means the code point
+ * can't be encoded in the current locale, so treat it as
+ * EILSEQ.
+ */
+ len =3D MB_CUR_MAX - dstleft;
+ if (len =3D=3D 0) {
+ errno =3D EILSEQ;
+ len =3D (size_t)-1;
+ goto out;
+ }
+
+out: error =3D errno;
+ _citrus_iconv_close(iconv);
+ errno =3D error;
+ return len;
+}
diff -r 529c5dc3c6e7 lib/libc/locale/mbrtoc16.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/libc/locale/mbrtoc16.c Wed Aug 14 21:13:10 2024 +0000
@@ -0,0 +1,191 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTO=
RS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIM=
ITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICU=
LAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTO=
RS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF =
THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * mbrtoc16(&c16, s, n, ps)
+ *
+ * Decode a Unicode code point from up to n bytes out of the
+ * multibyte string s, using multibyte encoding state ps, and
+ * store the next code unit in the UTF-16 representation of that
+ * code point at c16.
+ *
+ * If the next code point in s is outside the Basic Multilingual
+ * Plane, mbrtoc16 will yield the high surrogate in one call that
+ * consumes input, and will yield the low surrogate in the next
+ * call without consuming any input and returning (size_t)-3
+ * instead.
+ *
+ * Return the number of bytes consumed on success, or:
+ *
+ * - 0 if the code unit is NUL, or
+ * - (size_t)-3 if the trailing low surrogate of a surrogate pair
+ * was returned without consuming any additional input, or
+ * - (size_t)-2 if the input is incomplete, or
+ * - (size_t)-1 on error with errno set to EILSEQ.
+ *
+ * In the case of incomplete input, the decoding state so far
+ * after processing s[0], s[1], ..., s[n - 1] is saved in ps, so
+ * subsequent calls to mbrtoc16 will pick up n bytes later into
+ * the input stream.
+ *
+ * References:
+ *
+ * The Unicode Standard, Version 15.0 -- Core Specification, The
+ * Unicode Consortium, Sec. 3.8 `Surrogates', p. 119.
+ * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf=
#page=3D144
+ * https://web.archive.org/web/20240718101254/https://www.unicode.org/vers=
ions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=3D144
+ *
+ * The Unicode Standard, Version 15.0 -- Core Specification, The
+ * Unicode Consortium, Sec. 3.9 `Unicode Encoding Forms': UTF-16,
+ * p. 124.
+ * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf=
#page=3D150
+ * https://web.archive.org/web/20240718101254/https://www.unicode.org/vers=
ions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=3D150
+ *
+ * P. Hoffman and F. Yergeau, `UTF-16, an encoding of ISO 10646',
+ * RFC 2781, Internet Engineering Task Force, February 2000,
+ * Sec. 2.1: `Encoding UTF-16'.
+ * https://datatracker.ietf.org/doc/html/rfc2781#section-2.1
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD$");
+
+#include <assert.h>
+#include <errno.h>
+#include <stddef.h>
+#include <uchar.h>
+
+#include "mbrtoc32.h"
+
+struct mbrtoc16state {
+ char16_t surrogate;
+ mbstate_t mbs;
+};
+__CTASSERT(sizeof(struct mbrtoc32state) <=3D sizeof(mbstate_t) -
+ offsetof(struct mbrtoc16state, mbs));
+__CTASSERT(_Alignof(struct mbrtoc16state) <=3D _Alignof(mbstate_t));
+
+size_t
+mbrtoc16(char16_t *restrict pc16, const char *restrict s, size_t n,
+ mbstate_t *restrict ps)
+{
+ static mbstate_t psbuf;
+ struct mbrtoc16state *S;
+ char32_t c32;
+ size_t len;
+
+ /*
+ * `If ps is a null pointer, each function uses its own
+ * internal mbstate_t object instead, which is initialized at
+ * program startup to the initial conversion state; the
+ * functions are not required to avoid data races with other
+ * calls to the same function in this case. The
+ * implementation behaves as if no library function calls
+ * these functions with a null pointer for ps.'
+ */
+ if (ps =3D=3D NULL)
+ ps =3D &psbuf;
+
+ /*
+ * `If s is a null pointer, the mbrtoc16 function is equivalent
+ * to the call:
+ *
+ * mbrtoc16(NULL, "", 1, ps)
+ *
+ * In this case, the values of the parameters pc16 and n are
+ * ignored.'
+ */
+ if (s =3D=3D NULL) {
+ pc16 =3D NULL;
+ s =3D "";
+ n =3D 1;
+ }
+
+ /*
+ * Get the private conversion state.
+ */
+ S =3D (struct mbrtoc16state *)ps;
+
+ /*
+ * If there is a pending surrogate, stash it and consume no
+ * bytes of the input, returning (size_t)-3 to indicate that no
+ * bytes of input were consumed.
+ */
+ if (S->surrogate >=3D 0xdc00 && S->surrogate <=3D 0xdfff) {
+ if (pc16)
+ *pc16 =3D S->surrogate;
+ S->surrogate =3D 0;
+ return (size_t)-3;
+ }
+
+ /*
+ * Consume the next code point. If no full code point can be
+ * obtained, stop here.
+ */
+ len =3D mbrtoc32(&c32, s, n, &S->mbs);
+ switch (len) {
+ case 0: /* NUL */
+ if (pc16)
+ *pc16 =3D 0;
+ return 0;
+ case (size_t)-2: /* still incomplete after n bytes */
+ case (size_t)-1: /* error */
+ return len;
+ default: /* consumed len bytes of input */
+ break;
+ }
+
+ /*
+ * We consumed a code point from the input.
+ *
+ * If it's inside the Basic Multilingual Plane (16-bit code
+ * points), return it.
+ *
+ * If it's outside the Basic Multilingual Plane, split it into
+ * high and low surrogate code points, return the high, and
+ * save the low.
+ */
+ if (c32 <=3D 0xffff) {
+ if (pc16)
+ *pc16 =3D c32;
+ _DIAGASSERT(S->surrogate =3D=3D 0);
+ } else {
+ c32 -=3D 0x10000;
+ const char16_t w1 =3D 0xd800 | __SHIFTOUT(c32, __BITS(19,10));
+ const char16_t w2 =3D 0xdc00 | __SHIFTOUT(c32, __BITS(9,0));
+ if (pc16)
+ *pc16 =3D w1;
+ S->surrogate =3D w2;
+ _DIAGASSERT(S->surrogate !=3D 0);
+ }
+
+ /*
+ * Return the number of bytes consumed from the input.
+ */
+ return len;
+}
diff -r 529c5dc3c6e7 lib/libc/locale/mbrtoc32.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/libc/locale/mbrtoc32.c Wed Aug 14 21:13:10 2024 +0000
@@ -0,0 +1,229 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTO=
RS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIM=
ITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICU=
LAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTO=
RS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF =
THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * mbrtoc32(&c32, s, n, ps)
+ *
+ * Decode a Unicode UTF-32 code unit from up to n bytes out of the
+ * multibyte string s, and store it at c32, using multibyte
+ * encoding state ps. A UTF-32 code unit is also a Unicode scalar
+ * value, which is any Unicode code point except a surrogate.
+ *
+ * Return the number of bytes consumed on success, or 0 if the
+ * code unit is NUL, or (size_t)-2 if the input is incomplete, or
+ * (size_t)-1 on error with errno set to EILSEQ.
+ *
+ * In the case of incomplete input, the decoding state so far
+ * after processing s[0], s[1], ..., s[n - 1] is saved in ps, so
+ * subsequent calls to mbrtoc32 will pick up n bytes later into
+ * the input stream.
+ *
+ * References:
+ *
+ * The Unicode Standard, Version 15.0 -- Core Specification, The
+ * Unicode Consortium, Sec. 3.8 `Surrogates', p. 119.
+ * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+ * https://web.archive.org/web/20240718101254/https://www.unicode.org/vers=
ions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD$");
+
+#include <sys/param.h> /* MIN */
+#include <sys/types.h> /* broken citrus_*.h */
+#include <sys/queue.h> /* broken citrus_*.h */
+
+#include <assert.h>
+#include <errno.h>
+#include <langinfo.h>
+#include <limits.h>
+#include <paths.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <uchar.h>
+#include <wchar.h>
+
+#include "citrus_types.h" /* broken citrus_iconv.h */
+#include "citrus_module.h" /* broken citrus_iconv.h */
+#include "citrus_hash.h" /* broken citrus_iconv.h */
+#include "citrus_iconv.h"
+
+#include "mbrtoc32.h"
+
+__CTASSERT(_Alignof(struct mbrtoc32state) <=3D _Alignof(mbstate_t));
+
+size_t
+mbrtoc32(char32_t *restrict pc32, const char *restrict s, size_t n,
+ mbstate_t *restrict ps)
+{
+ static mbstate_t psbuf;
+ struct mbrtoc32state *S;
+ struct _citrus_iconv *iconv =3D NULL;
+ size_t len;
+ char32_t c32;
+ int error;
+
+ /*
+ * `If ps is a null pointer, each function uses its own
+ * internal mbstate_t object instead, which is initialized at
+ * program startup to the initial conversion state; the
+ * functions are not required to avoid data races with other
+ * calls to the same function in this case. The
+ * implementation behaves as if no library function calls
+ * these functions with a null pointer for ps.'
+ */
+ if (ps =3D=3D NULL)
+ ps =3D &psbuf;
+
+ /*
+ * `If s is a null pointer, the mbrtoc32 function is equivalent
+ * to the call:
+ *
+ * mbrtoc32(NULL, "", 1, ps)
+ *
+ * In this case, the values of the parameters pc32 and n are
+ * ignored.'
+ */
+ if (s =3D=3D NULL) {
+ pc32 =3D NULL;
+ s =3D "";
+ n =3D 1;
+ }
+
+ /*
+ * Get the private conversion state.
+ */
+ S =3D (struct mbrtoc32state *)ps;
+
+ /*
+ * If input length is zero, the result is always incomplete by
+ * definition. Don't bother with iconv -- we'd have to
+ * disentangle truncated outputs.
+ */
+ if (n =3D=3D 0) {
+ len =3D (size_t)-2;
+ goto out;
+ }
+
+ /*
+ * Reset the destination buffer if this is the initial state.
+ */
+ if (S->dstleft =3D=3D 0)
+ S->dstleft =3D sizeof(S->dstbuf);
+
+ /*
+ * Open an iconv handle to convert locale-dependent multibyte
+ * input to UTF-32LE.
+ */
+ if ((error =3D _citrus_iconv_open(&iconv, _PATH_ICONV,
+ nl_langinfo(CODESET), "utf-32le")) !=3D 0) {
+ errno =3D EIO; /* XXX? */
+ len =3D (size_t)-1;
+ goto out;
+ }
+
+ /*
+ * Try to iconv a minimal prefix. If we succeed, set len to
+ * the length consumed and goto ok.
+ */
+ for (len =3D 0; len < MIN(n, sizeof(S->srcbuf) - S->nsrc);) {
+ const char *src =3D S->srcbuf;
+ size_t srcleft;
+ char *dst =3D S->dstbuf + sizeof(S->dstbuf) - S->dstleft;
+ size_t inval;
+
+ S->srcbuf[S->nsrc++] =3D s[len++];
+ srcleft =3D S->nsrc;
+
+ error =3D _citrus_iconv_convert(iconv,
+ &src, &srcleft,
+ &dst, &S->dstleft,
+ _CITRUS_ICONV_F_HIDE_INVALID, &inval);
+ if (error !=3D EINVAL) {
+ if (error =3D=3D 0)
+ goto ok;
+ errno =3D error;
+ len =3D (size_t)-1;
+ goto out;
+ }
+ }
+
+ /*
+ * Incomplete. Return (size_t)-2 and let the caller try again.
+ * We have consumed all n bytes at this point without finding a
+ * complete code point.
+ */
+ len =3D (size_t)-2;
+ goto out;
+
+ok: /*
+ * Successfully converted a minimal byte sequence, which should
+ * produce exactly one UTF-32LE code unit, representing a code
+ * point. Get the code point.
+ */
+ c32 =3D le32dec(S->dstbuf);
+
+ /*
+ * If we got the null code point, return zero length, as the
+ * contract requires.
+ */
+ if (c32 =3D=3D 0) {
+ if (pc32)
+ *pc32 =3D 0;
+ len =3D 0;
+ goto out;
+ }
+
+ /*
+ * Reject surrogates.
+ *
+ * XXX Is this necessary? Won't iconv take care of it for us?
+ */
+ if (c32 >=3D 0xd800 && c32 <=3D 0xdfff) {
+ errno =3D EILSEQ;
+ len =3D (size_t)-1;
+ goto out;
+ }
+
+ /*
+ * Non-surrogate code point. Return it.
+ */
+ if (pc32)
+ *pc32 =3D c32;
+ goto out;
+
+out: if (len !=3D (size_t)-2) {
+ S->nsrc =3D 0;
+ memset(S, 0, sizeof(*S)); /* paranoia */
+ }
+ error =3D errno;
+ _citrus_iconv_close(iconv);
+ errno =3D error;
+ return len;
+}
diff -r 529c5dc3c6e7 lib/libc/shlib_version
--- a/lib/libc/shlib_version Mon Aug 12 21:49:59 2024 +0000
+++ b/lib/libc/shlib_version Wed Aug 14 21:13:10 2024 +0000
@@ -55,4 +55,4 @@
# - remove tzsetwall(3), upstream has removed it
# - move *rand48* to libcompat
major=3D12
-minor=3D221
+minor=3D222
diff -r 529c5dc3c6e7 tests/lib/libc/locale/Makefile
--- a/tests/lib/libc/locale/Makefile Mon Aug 12 21:49:59 2024 +0000
+++ b/tests/lib/libc/locale/Makefile Wed Aug 14 21:13:10 2024 +0000
@@ -4,29 +4,34 @@
=20
TESTSDIR=3D ${TESTSBASE}/lib/libc/locale
=20
+TESTS_C+=3D t_btowc
+TESTS_C+=3D t_c16rtomb
+TESTS_C+=3D t_c32rtomb
+TESTS_C+=3D t_digittoint
+TESTS_C+=3D t_ducet
+TESTS_C+=3D t_io
+TESTS_C+=3D t_mbrtoc16
+TESTS_C+=3D t_mbrtoc32
TESTS_C+=3D t_mbrtowc
+TESTS_C+=3D t_mbsnrtowcs
TESTS_C+=3D t_mbstowcs
-TESTS_C+=3D t_mbsnrtowcs
TESTS_C+=3D t_mbtowc
+TESTS_C+=3D t_sprintf
+TESTS_C+=3D t_strfmon
+TESTS_C+=3D t_toupper
+TESTS_C+=3D t_uchar
+TESTS_C+=3D t_wcscoll
TESTS_C+=3D t_wcscspn
TESTS_C+=3D t_wcspbrk
TESTS_C+=3D t_wcsrtombs
TESTS_C+=3D t_wcsspn
TESTS_C+=3D t_wcstod
TESTS_C+=3D t_wctomb
-TESTS_C+=3D t_io
-TESTS_C+=3D t_toupper
-TESTS_C+=3D t_digittoint
-TESTS_C+=3D t_sprintf
TESTS_C+=3D t_wctype
-TESTS_C+=3D t_btowc
-TESTS_C+=3D t_wcscoll
-TESTS_C+=3D t_ducet
-TESTS_C+=3D t_strfmon
=20
-COPTS.t_wctomb.c +=3D -Wno-stack-protector
+COPTS.t_btowc.c +=3D -Wno-unused-variable
COPTS.t_digittoint.c +=3D -Wno-unused-variable
-COPTS.t_btowc.c +=3D -Wno-unused-variable
COPTS.t_strfmon.c +=3D -Wno-format-nonliteral
+COPTS.t_wctomb.c +=3D -Wno-stack-protector
=20
.include <bsd.test.mk>
diff -r 529c5dc3c6e7 tests/lib/libc/locale/t_c16rtomb.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/locale/t_c16rtomb.c Wed Aug 14 21:13:10 2024 +0000
@@ -0,0 +1,187 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2002 Tim J. Robbins
+ * All rights reserved.
+ *
+ * Copyright (c) 2013 Ed Schouten <ed%FreeBSD.org@localhost>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURP=
OSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENT=
IAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STR=
ICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY W=
AY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Test program for c16rtomb() as specified by ISO/IEC 9899:2011.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD$");
+
+#include <errno.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <uchar.h>
+
+#include <atf-c.h>
+
+static void
+require_lc_ctype(const char *locale_name)
+{
+ char *lc_ctype_set;
+
+ lc_ctype_set =3D setlocale(LC_CTYPE, locale_name);
+ if (lc_ctype_set =3D=3D NULL)
+ atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=3D%d",
+ locale_name, errno);
+
+ ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
+ "lc_ctype_set=3D%s locale_name=3D%s", lc_ctype_set, locale_name);
+}
+
+static mbstate_t s;
+static char buf[MB_LEN_MAX + 1];
+
+ATF_TC_WITHOUT_HEAD(c16rtomb_c_locale_test);
+ATF_TC_BODY(c16rtomb_c_locale_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("C");
+
+ /*
+ * If the buffer argument is NULL, c16 is implicitly 0,
+ * c16rtomb() resets its internal state.
+ */
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(NULL, L'\0', NULL)), 1, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(NULL, 0xdc00, NULL)), 1, "n=3D%zu", n);
+
+ /* Null wide character. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(buf, 0, &s)), 1, "n=3D%zu", n);
+ ATF_CHECK_MSG(((unsigned char)buf[0] =3D=3D 0 &&
+ (unsigned char)buf[1] =3D=3D 0xcc),
+ "buf=3D[%02x %02x]", buf[0], buf[1]);
+
+ /* Latin letter A, internal state. */
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(NULL, L'\0', NULL)), 1, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(NULL, L'A', NULL)), 1, "n=3D%zu", n);
+
+ /* Latin letter A. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(buf, L'A', &s)), 1, "n=3D%zu", n);
+ ATF_CHECK_MSG(((unsigned char)buf[0] =3D=3D 'A' &&
+ (unsigned char)buf[1] =3D=3D 0xcc),
+ "buf=3D[%02x %02x]", buf[0], buf[1]);
+
+ /* Unicode character 'Pile of poo'. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(buf, 0xd83d, &s)), 0, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(buf, 0xdca9, &s)), (size_t)-1,
+ "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=3D%d", errno);
+ ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=3D[%02x]", buf[0]);
+}
+
+ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_1_test);
+ATF_TC_BODY(c16rtomb_iso_8859_1_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("en_US.ISO8859-1");
+
+ /* Unicode character 'Euro sign'. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(buf, 0x20ac, &s)), (size_t)-1,
+ "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=3D%d", errno);
+ ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=3D[%02x]", buf[0]);
+}
+
+ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_15_test);
+ATF_TC_BODY(c16rtomb_iso_8859_15_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("en_US.ISO8859-15");
+
+ /* Unicode character 'Euro sign'. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(buf, 0x20ac, &s)), 1, "n=3D%zu", n);
+ ATF_CHECK_MSG(((unsigned char)buf[0] =3D=3D 0xa4 &&
+ (unsigned char)buf[1] =3D=3D 0xcc),
+ "buf=3D[%02x %02x]", buf[0], buf[1]);
+}
+
+ATF_TC_WITHOUT_HEAD(c16rtomb_utf_8_test);
+ATF_TC_BODY(c16rtomb_utf_8_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("en_US.UTF-8");
+
+ /* Unicode character 'Pile of poo'. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(buf, 0xd83d, &s)), 0, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(buf, 0xdca9, &s)), 4, "n=3D%zu", n);
+ ATF_CHECK_MSG(((unsigned char)buf[0] =3D=3D 0xf0 &&
+ (unsigned char)buf[1] =3D=3D 0x9f &&
+ (unsigned char)buf[2] =3D=3D 0x92 &&
+ (unsigned char)buf[3] =3D=3D 0xa9 &&
+ (unsigned char)buf[4] =3D=3D 0xcc),
+ "buf=3D[%02x %02x %02x %02x %02x]",
+ buf[0], buf[1], buf[2], buf[3], buf[4]);
+
+ /* Invalid code; 'Pile of poo' without the trail surrogate. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(buf, 0xd83d, &s)), 0, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(buf, L'A', &s)), (size_t)-1,
+ "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=3D%d", errno);
+ ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=3D[%02x]", buf[0]);
+
+ /* Invalid code; 'Pile of poo' without the lead surrogate. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ ATF_CHECK_EQ_MSG((n =3D c16rtomb(buf, 0xdca9, &s)), (size_t)-1,
+ "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=3D%d", errno);
+ ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=3D[%02x]", buf[0]);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+ ATF_TP_ADD_TC(tp, c16rtomb_c_locale_test);
+ ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_1_test);
+ ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_15_test);
+ ATF_TP_ADD_TC(tp, c16rtomb_utf_8_test);
+
+ return (atf_no_error());
+}
diff -r 529c5dc3c6e7 tests/lib/libc/locale/t_c32rtomb.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/locale/t_c32rtomb.c Wed Aug 14 21:13:10 2024 +0000
@@ -0,0 +1,60 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTO=
RS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIM=
ITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICU=
LAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTO=
RS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF =
THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD$");
+
+#include <atf-c.h>
+#include <locale.h>
+#include <uchar.h>
+
+#include "h_macros.h"
+
+ATF_TC(c32rtomb_null);
+ATF_TC_HEAD(c32rtomb_null, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test null string output to c32rtomb");
+}
+ATF_TC_BODY(c32rtomb_null, tc)
+{
+ char *locale;
+ mbstate_t ps =3D {0};
+ size_t n;
+
+ REQUIRE_LIBC((locale =3D setlocale(LC_ALL, "C")), NULL);
+ ATF_REQUIRE_EQ_MSG(strcmp(locale, "C"), 0, "locale=3D%s", locale);
+
+ ATF_CHECK_EQ_MSG((n =3D c32rtomb(NULL, L'x', &ps)), 1, "n=3D%zu", n);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+ ATF_TP_ADD_TC(tp, c32rtomb_null);
+ return atf_no_error();
+}
diff -r 529c5dc3c6e7 tests/lib/libc/locale/t_mbrtoc16.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/locale/t_mbrtoc16.c Wed Aug 14 21:13:10 2024 +0000
@@ -0,0 +1,241 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2002 Tim J. Robbins
+ * All rights reserved.
+ *
+ * Copyright (c) 2013 Ed Schouten <ed%FreeBSD.org@localhost>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURP=
OSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENT=
IAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STR=
ICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY W=
AY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Test program for mbrtoc16() as specified by ISO/IEC 9899:2011.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD$");
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <locale.h>
+#include <string.h>
+#include <uchar.h>
+
+#include <atf-c.h>
+
+static void
+require_lc_ctype(const char *locale_name)
+{
+ char *lc_ctype_set;
+
+ lc_ctype_set =3D setlocale(LC_CTYPE, locale_name);
+ if (lc_ctype_set =3D=3D NULL)
+ atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=3D%d",
+ locale_name, errno);
+
+ ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
+ "lc_ctype_set=3D%s locale_name=3D%s", lc_ctype_set, locale_name);
+}
+
+static mbstate_t s;
+static char16_t c16;
+
+ATF_TC_WITHOUT_HEAD(mbrtoc16_c_locale_test);
+ATF_TC_BODY(mbrtoc16_c_locale_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("C");
+
+ /* Null wide character, internal state. */
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "", 1, NULL)), 0, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0, "c16=3DU+%"PRIx16, (uint16_t)c16);
+
+ /* Null wide character. */
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "", 1, &s)), 0, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0, "c16=3DU+%"PRIx16, (uint16_t)c16);
+
+ /* Latin letter A, internal state. */
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(NULL, 0, 0, NULL)), 0, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "A", 1, NULL)), 1, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'A', "c16=3DU+%"PRIx16" L'A'=3DU+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'A');
+
+ /* Latin letter A. */
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "A", 1, &s)), 1, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'A', "c16=3DU+%"PRIx16" L'A'=3DU+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'A');
+
+ /* Incomplete character sequence. */
+ c16 =3D L'z';
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
+ "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'z', "c16=3DU+%"PRIx16" L'z'=3DU+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'z');
+
+ /* Check that mbrtoc16() doesn't access the buffer when n =3D=3D 0. */
+ c16 =3D L'z';
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
+ "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'z', "c16=3DU+%"PRIx16" L'z'=3DU+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'z');
+
+ /* Check that mbrtoc16() doesn't read ahead too aggressively. */
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "AB", 2, &s)), 1, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'A', "c16=3DU+%"PRIx16" L'A'=3DU+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'A');
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "C", 1, &s)), 1, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'C', "c16=3DU+%"PRIx16" L'C'=3DU+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'C');
+
+}
+
+ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_1_test);
+ATF_TC_BODY(mbrtoc16_iso_8859_1_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("en_US.ISO8859-1");
+
+ /* Currency sign. */
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0xa4, "c16=3DU+%"PRIx16, (uint16_t)c16);
+}
+
+ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_15_test);
+ATF_TC_BODY(mbrtoc16_iso_8859_15_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("en_US.ISO8859-15");
+
+ /* Euro sign. */
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0x20ac, "c16=3DU+%"PRIx16, (uint16_t)c16);
+}
+
+ATF_TC_WITHOUT_HEAD(mbrtoc16_utf_8_test);
+ATF_TC_BODY(mbrtoc16_utf_8_test, tc)
+{
+ size_t n;
+
+ require_lc_ctype("en_US.UTF-8");
+
+ /* Null wide character, internal state. */
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(NULL, 0, 0, NULL)), 0, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "", 1, NULL)), 0, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0, "c16=3DU+%"PRIx16, (uint16_t)c16);
+
+ /* Null wide character. */
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "", 1, &s)), 0, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0, "c16=3DU+%"PRIx16, (uint16_t)c16);
+
+ /* Latin letter A, internal state. */
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(NULL, 0, 0, NULL)), 0, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "A", 1, NULL)), 1, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'A', "c16=3DU+%"PRIx16" L'A'=3DU+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'A');
+
+ /* Latin letter A. */
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "A", 1, &s)), 1, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'A', "c16=3DU+%"PRIx16" L'A'=3DU+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'A');
+
+ /* Incomplete character sequence (zero length). */
+ c16 =3D L'z';
+ memset(&s, 0, sizeof(s));
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
+ "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, L'z', "c16=3DU+%"PRIx16" L'z'=3DU+%"PRIx16,
+ (uint16_t)c16, (uint16_t)L'z');
+
+ /* Incomplete character sequence (truncated double-byte). */
+ memset(&s, 0, sizeof(s));
+ c16 =3D 0;
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2,
+ "n=3D%zu", n);
+
+ /* Same as above, but complete. */
+ memset(&s, 0, sizeof(s));
+ c16 =3D 0;
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "\xc3\x84", 2, &s)), 2,
+ "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0xc4, "c16=3DU+%"PRIx16, (uint16_t)c16);
+
+ /* Test restarting behaviour. */
+ memset(&s, 0, sizeof(s));
+ c16 =3D 0;
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2,
+ "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0, "c16=3DU+%"PRIx16, (uint16_t)c16);
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "\xb7", 1, &s)), 1, "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0xf7, "c16=3DU+%"PRIx16, (uint16_t)c16);
+
+ /* Surrogate pair. */
+ memset(&s, 0, sizeof(s));
+ c16 =3D 0;
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "\xf0\x9f\x92\xa9", 4, &s)), 4,
+ "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0xd83d, "c16=3DU+%"PRIx16, (uint16_t)c16);
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "", 0, &s)), (size_t)-3,
+ "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0xdca9, "c16=3DU+%"PRIx16, (uint16_t)c16);
+
+ /* Letter e with acute, precomposed. */
+ memset(&s, 0, sizeof(s));
+ c16 =3D 0;
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "\xc3\xa9", 2, &s)), 2,
+ "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0xe9, "c16=3DU+%"PRIx16, (uint16_t)c16);
+
+ /* Letter e with acute, combined. */
+ memset(&s, 0, sizeof(s));
+ c16 =3D 0;
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "\x65\xcc\x81", 3, &s)), 1,
+ "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0x65, "c16=3DU+%"PRIx16, (uint16_t)c16);
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc16(&c16, "\xcc\x81", 2, &s)), 2,
+ "n=3D%zu", n);
+ ATF_CHECK_EQ_MSG(c16, 0x301, "c16=3DU+%"PRIx16, (uint16_t)c16);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+ ATF_TP_ADD_TC(tp, mbrtoc16_c_locale_test);
+ ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_1_test);
+ ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_15_test);
+ ATF_TP_ADD_TC(tp, mbrtoc16_utf_8_test);
+
+ return (atf_no_error());
+}
diff -r 529c5dc3c6e7 tests/lib/libc/locale/t_mbrtoc32.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/locale/t_mbrtoc32.c Wed Aug 14 21:13:10 2024 +0000
@@ -0,0 +1,61 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTO=
RS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIM=
ITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICU=
LAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTO=
RS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF =
THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD$");
+
+#include <atf-c.h>
+#include <locale.h>
+#include <uchar.h>
+
+#include "h_macros.h"
+
+ATF_TC(mbrtoc32_null);
+ATF_TC_HEAD(mbrtoc32_null, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test null string input to mbrtoc32");
+}
+ATF_TC_BODY(mbrtoc32_null, tc)
+{
+ char *locale;
+ char32_t c32;
+ mbstate_t ps =3D {0};
+ size_t n;
+
+ REQUIRE_LIBC((locale =3D setlocale(LC_ALL, "C")), NULL);
+ ATF_REQUIRE_EQ_MSG(strcmp(locale, "C"), 0, "locale=3D%s", locale);
+
+ ATF_CHECK_EQ_MSG((n =3D mbrtoc32(&c32, NULL, 0, &ps)), 0, "n=3D%zu", n);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+ ATF_TP_ADD_TC(tp, mbrtoc32_null);
+ return atf_no_error();
+}
diff -r 529c5dc3c6e7 tests/lib/libc/locale/t_uchar.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/lib/libc/locale/t_uchar.c Wed Aug 14 21:13:10 2024 +0000
@@ -0,0 +1,73 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTO=
RS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIM=
ITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICU=
LAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTO=
RS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF =
THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Include <uchar.h> first to verify it declares everything we need.
+ */
+#include <uchar.h>
+typedef mbstate_t nbtest_mbstate_t;
+typedef size_t nbtest_size_t;
+typedef char16_t nbtest_char16_t;
+typedef char32_t nbtest_char32_t;
+static size_t (*nbtest_mbrtoc16)(char16_t *restrict, const char *restrict,
+ size_t, mbstate_t *restrict) __unused =3D &mbrtoc16;
+static size_t (*nbtest_c16rtomb)(char *restrict, char16_t,
+ mbstate_t *restrict) __unused =3D &c16rtomb;
+static size_t (*nbtest_mbrtoc32)(char32_t *restrict, const char *restrict,
+ size_t, mbstate_t *restrict) __unused =3D mbrtoc32;
+static size_t (*nbtest_c32rtomb)(char *restrict, char32_t,
+ mbstate_t *restrict) __unused =3D &c32rtomb;
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD$");
+
+#include <atf-c.h>
+#include <stdint.h>
+
+ATF_TC(uchartypes);
+ATF_TC_HEAD(uchartypes, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test <uchar.h> types are reasonable");
+}
+ATF_TC_BODY(uchartypes, tc)
+{
+
+ ATF_CHECK_EQ_MSG(sizeof(char16_t), sizeof(uint_least16_t),
+ "char16_t %zu, uint_least16_t %zu",
+ sizeof(char16_t), sizeof(uint_least16_t));
+ ATF_CHECK_EQ_MSG(sizeof(char32_t), sizeof(uint_least32_t),
+ "char32_t %zu, uint_least32_t %zu",
+ sizeof(char32_t), sizeof(uint_least32_t));
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+ ATF_TP_ADD_TC(tp, uchartypes);
+ return atf_no_error();
+}
--=_iI1xPvve3uCYqEU41Ybw8A5VS88pKDrs--
Home |
Main Index |
Thread Index |
Old Index