Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/perseant-stdc-iso10646]: src/lib/libc Support loading collation data fro...
details: https://anonhg.NetBSD.org/src/rev/88b641e341ad
branches: perseant-stdc-iso10646
changeset: 850679:88b641e341ad
user: perseant <perseant%NetBSD.org@localhost>
date: Mon Jul 31 04:29:50 2017 +0000
description:
Support loading collation data from file. Began with FreeBSD's
xlocale_collate, but had to change it somewhat to accommodate the
requirements of the Unicode Collation Algorithm (in particular,
there are maps from single-character collation elements to
multiple collation weight vectors, and multiple-to-multiple
mappings as well).
diffstat:
lib/libc/citrus/citrus_lc_collate.c | 10 +-
lib/libc/locale/collate_local.h | 27 +-
lib/libc/locale/collate_locale.c | 155 ++++++++++--
lib/libc/locale/unicode_collate.c | 64 +++-
lib/libc/locale/unicode_nfd_qc_data.h | 2 +
lib/libc/locale/unicode_ucd.c | 431 ++++++++++++++++++++++-----------
lib/libc/locale/unicode_ucd.h | 42 ++-
7 files changed, 510 insertions(+), 221 deletions(-)
diffs (truncated from 1077 to 300 lines):
diff -r 58f2e0472eef -r 88b641e341ad lib/libc/citrus/citrus_lc_collate.c
--- a/lib/libc/citrus/citrus_lc_collate.c Mon Jul 31 04:23:35 2017 +0000
+++ b/lib/libc/citrus/citrus_lc_collate.c Mon Jul 31 04:29:50 2017 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: citrus_lc_collate.c,v 1.1.2.1 2017/07/14 15:53:07 perseant Exp $ */
+/* $NetBSD: citrus_lc_collate.c,v 1.1.2.2 2017/07/31 04:29:50 perseant Exp $ */
/*-
* Copyright (c)2008 Citrus Project,
@@ -28,7 +28,7 @@
#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
-__RCSID("$NetBSD: citrus_lc_collate.c,v 1.1.2.1 2017/07/14 15:53:07 perseant Exp $");
+__RCSID("$NetBSD: citrus_lc_collate.c,v 1.1.2.2 2017/07/31 04:29:50 perseant Exp $");
#endif /* LIBC_SCCS and not lint */
#include "reentrant.h"
@@ -65,13 +65,13 @@
/*
* macro required by nb_lc_template(_decl).h
*/
-#define _CATEGORY_TYPE _CollateLocale
+#define _CATEGORY_TYPE struct xlocale_collate
#include "nb_lc_template_decl.h"
static int
_citrus_LC_COLLATE_create_impl(const char * __restrict root,
- const char * __restrict name, _CollateLocale ** __restrict pdata)
+ const char * __restrict name, struct xlocale_collate ** __restrict pdata)
{
char path[PATH_MAX + 1];
int ret;
@@ -92,7 +92,7 @@
}
static __inline void
-_PREFIX(update_global)(_CollateLocale *data)
+_PREFIX(update_global)(struct xlocale_collate *data)
{
_DIAGASSERT(data != NULL);
}
diff -r 58f2e0472eef -r 88b641e341ad lib/libc/locale/collate_local.h
--- a/lib/libc/locale/collate_local.h Mon Jul 31 04:23:35 2017 +0000
+++ b/lib/libc/locale/collate_local.h Mon Jul 31 04:29:50 2017 +0000
@@ -1,26 +1,45 @@
-/* $NetBSD: collate_local.h,v 1.1.2.1 2017/07/14 15:53:08 perseant Exp $ */
+/* $NetBSD: collate_local.h,v 1.1.2.2 2017/07/31 04:29:50 perseant Exp $ */
#ifndef _COLLATE_LOCAL_H_
#define _COLLATE_LOCAL_H_
#include <locale.h>
+#include "collate.h"
#include "unicode_ucd.h"
typedef struct _CollateLocale {
void *coll_variable;
size_t coll_variable_len;
- struct ucd_coll *coll_data;
+ struct ucd_coll *coll_data; /* XXX obsolescent */
size_t coll_data_len;
+ const struct _FileCollateLocale *coll_fcl;
+#define coll_collinfo coll_fcl->fcl_collinfo
+#define coll_char_data coll_fcl->fcl_char_data
+ const collate_subst_t *coll_subst;
+ const collate_chain_t *coll_chains;
+ const collate_large_t *coll_large;
} _CollateLocale;
+typedef struct _FileCollateLocale {
+ collate_info_t fcl_collinfo;
+ collate_char_t fcl_char_data[0x100];
+/*
+ These fields are variable length (perhaps 0)
+ and follow the previous fields in the file:
+ collate_chain_t *chains;
+ collate_large_t *large;
+ collate_subst_t *subst;
+*/
+} _FileCollateLocale;
+
/*
* global variables
*/
-extern __dso_hidden const _CollateLocale _DefaultCollateLocale;
+extern __dso_hidden const struct xlocale_collate _DefaultCollateLocale;
__BEGIN_DECLS
-int _collate_load(const char * __restrict, size_t, _CollateLocale ** __restrict);
+int _collate_load(const char * __restrict, size_t, struct xlocale_collate ** __restrict);
__END_DECLS
#endif /* !_COLLATE_LOCAL_H_ */
diff -r 58f2e0472eef -r 88b641e341ad lib/libc/locale/collate_locale.c
--- a/lib/libc/locale/collate_locale.c Mon Jul 31 04:23:35 2017 +0000
+++ b/lib/libc/locale/collate_locale.c Mon Jul 31 04:29:50 2017 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: collate_locale.c,v 1.1.2.1 2017/07/14 15:53:08 perseant Exp $ */
+/* $NetBSD: collate_locale.c,v 1.1.2.2 2017/07/31 04:29:50 perseant Exp $ */
/*-
* Copyright (c)2010 Citrus Project,
* All rights reserved.
@@ -25,53 +25,152 @@
* SUCH DAMAGE.
*/
+#ifdef COLLATION_TEST
+#include "collation_test.h"
+#endif
+
#define __SETLOCALE_SOURCE__
#include <assert.h>
#include <errno.h>
#include <locale.h>
-#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <wchar.h>
+#include "collate.h"
+
#include "setlocale_local.h"
#include "collate_local.h"
-#include "unicode_collation_data.h"
+#include "ducet_collation_data.h"
+#ifndef COLLATION_TEST
#include "citrus_module.h"
+#endif
-const _CollateLocale _DefaultCollateLocale = {
- __UNCONST("DUCET"),
- 5,
- &ucd_collate_data[0],
- UCD_COLLATE_DATA_LENGTH
+const struct collate_info _DefaultCollateInfo = {
+ 4, /* uint8_t directive_count; */
+ {}, /* uint8_t directive[COLL_WEIGHTS_MAX]; */
+ {}, /* int32_t pri_count[COLL_WEIGHTS_MAX]; */
+ 0, /* int32_t flags; */
+ DUCET_COLLATE_CHAINS_LENGTH, /* int32_t chain_count; */
+ DUCET_COLLATE_LARGE_LENGTH, /* int32_t large_count; */
+ { 0 }, /* int32_t subst_count[COLL_WEIGHTS_MAX]; */
+ { 0 }, /* int32_t undef_pri[COLL_WEIGHTS_MAX]; */
+ DUCET_COLLATE_RCHAINS_LENGTH, /* int32_t rchain_count; */
+ DUCET_COLLATE_DCHAINS_LENGTH, /* int32_t rchain_count; */
};
+const struct xlocale_collate _DefaultCollateLocale = {
+ 0, /* int __collate_load_error; */
+ NULL, /* char * map; */
+ 0, /* size_t maplen; */
+
+ &_DefaultCollateInfo, /* collate_info_t *info; */
+ ducet_collate_chars, /* collate_char_t *char_pri_table; */
+ ducet_collate_large, /* collate_large_t *large_pri_table; */
+ ducet_collate_chains, /* collate_chain_t *chain_pri_table; */
+ ducet_collate_rchains, /* collate_rchain_t *rchain_pri_table; */
+ ducet_collate_dchains, /* collate_dchain_t *dchain_pri_table; */
+ NULL, /* collate_subst_t *subst_table[COLL_WEIGHTS_MAX]; */
+};
+
+static int
+_collate_read_file(const char * __restrict, size_t,
+ struct xlocale_collate ** __restrict);
+
int
_collate_load(const char * __restrict var, size_t lenvar,
- _CollateLocale ** __restrict prl)
+ struct xlocale_collate ** __restrict prl)
{
- int ret;
-
_DIAGASSERT(var != NULL || lenvar < 1);
_DIAGASSERT(prl != NULL);
- if (lenvar < 1)
+ if (lenvar < COLLATE_STR_LEN)
+ return EFTYPE;
+
+ if (strncmp(var, COLLATE_VERSION, COLLATE_STR_LEN))
+ return EFTYPE;
+
+ var += COLLATE_STR_LEN;
+ lenvar -= COLLATE_STR_LEN;
+
+ return _collate_read_file(var, lenvar, prl);
+ }
+
+static int
+_collate_read_file(const char * __restrict var, size_t lenvar,
+ struct xlocale_collate ** __restrict prl)
+{
+ struct xlocale_collate *clp;
+ size_t section_length;
+ char *ci;
+
+ if (lenvar < sizeof(struct collate_info))
return EFTYPE;
- switch (*var) {
- case 'U':
-#ifdef notyet
- ret = _collate_read_file(var, lenvar, prl);
-#else
- *prl = (_CollateLocale *)malloc(sizeof(**prl));
- (*prl)->coll_variable = __UNCONST("FAKE");
- (*prl)->coll_variable_len = 4;
- (*prl)->coll_data = (struct ucd_coll *)malloc(sizeof(struct ucd_coll));
- (*prl)->coll_data_len = 0;
- ret = 0;
+
+ clp = (struct xlocale_collate *)malloc(sizeof(*clp));
+ ci = (char *)malloc(lenvar);
+ memcpy(ci, var, lenvar);
+
+ /* File header */
+ clp->info = (const struct collate_info *)ci;
+ ci += sizeof(*clp->info);
+ lenvar -= sizeof(*clp->info);
+
+ /* Table of narrow character priorities */
+ clp->char_pri_table = (const collate_char_t *)ci;
+ section_length = 0x80 * sizeof(collate_chain_t);
+ if (lenvar < section_length)
+ goto errout;
+ ci += section_length;
+ lenvar -= section_length;
+
+ /* Collation elements ("chains") */
+ clp->chain_pri_table = (const collate_chain_t *)ci;
+ section_length = clp->info->chain_count * sizeof(collate_chain_t);
+ if (lenvar < section_length)
+ goto errout;
+ ci += section_length;
+ lenvar -= section_length;
+
+ /* Collation weights for characters > 0x80 */
+ clp->large_pri_table = (const collate_large_t *)ci;
+ section_length = clp->info->large_count * sizeof(collate_large_t);
+ if (lenvar < section_length)
+ goto errout;
+ ci += section_length;
+ lenvar -= section_length;
+
+#if 0
+ /* Substitutions */
+ clp->subst_table = (collate_subst_t *)ci;
+ section_length = clp->info->subst_count * sizeof(collate_subst_t);
+ if (lenvar < section_length)
+ goto errout;
+ ci += section_length;
+ lenvar -= section_length;
#endif
- break;
- default:
- ret = EFTYPE;
- }
- return ret;
+
+ /* Characters that have more than one associated weight (reverse chains) */
+ clp->rchain_pri_table = (const collate_rchain_t *)ci;
+ section_length = clp->info->rchain_count * sizeof(collate_rchain_t);
+ if (lenvar < section_length)
+ goto errout;
+ ci += section_length;
+ lenvar -= section_length;
+
+ /* Double chains (>1 char to >1 weight mapping) */
+ clp->dchain_pri_table = (const collate_dchain_t *)ci;
+ section_length = clp->info->dchain_count * sizeof(collate_dchain_t);
+ if (lenvar < section_length)
+ goto errout;
+ ci += section_length;
+ lenvar -= section_length;
+
+ *prl = clp;
+ return 0;
+
+errout:
+ free(clp);
+ return EFTYPE;
}
diff -r 58f2e0472eef -r 88b641e341ad lib/libc/locale/unicode_collate.c
--- a/lib/libc/locale/unicode_collate.c Mon Jul 31 04:23:35 2017 +0000
+++ b/lib/libc/locale/unicode_collate.c Mon Jul 31 04:29:50 2017 +0000
@@ -27,7 +27,12 @@
* SUCH DAMAGE.
*/
+#ifdef COLLATION_TEST
+#include "collation_test.h"
+#endif /* COLLATION_TEST */
+
#include <sys/queue.h>
+#include <stdio.h>
#include <stdlib.h>
#include <locale.h>
Home |
Main Index |
Thread Index |
Old Index