Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/perseant-stdc-iso10646]: src/lib/libc Support loading collation data fro...



details:   https://anonhg.NetBSD.org/src/rev/88b641e341ad
branches:  perseant-stdc-iso10646
changeset: 850679:88b641e341ad
user:      perseant <perseant%NetBSD.org@localhost>
date:      Mon Jul 31 04:29:50 2017 +0000

description:
Support loading collation data from file.  Began with FreeBSD's
xlocale_collate, but had to change it somewhat to accommodate the
requirements of the Unicode Collation Algorithm (in particular,
there are maps from single-character collation elements to
multiple collation weight vectors, and multiple-to-multiple
mappings as well).

diffstat:

 lib/libc/citrus/citrus_lc_collate.c   |   10 +-
 lib/libc/locale/collate_local.h       |   27 +-
 lib/libc/locale/collate_locale.c      |  155 ++++++++++--
 lib/libc/locale/unicode_collate.c     |   64 +++-
 lib/libc/locale/unicode_nfd_qc_data.h |    2 +
 lib/libc/locale/unicode_ucd.c         |  431 ++++++++++++++++++++++-----------
 lib/libc/locale/unicode_ucd.h         |   42 ++-
 7 files changed, 510 insertions(+), 221 deletions(-)

diffs (truncated from 1077 to 300 lines):

diff -r 58f2e0472eef -r 88b641e341ad lib/libc/citrus/citrus_lc_collate.c
--- a/lib/libc/citrus/citrus_lc_collate.c       Mon Jul 31 04:23:35 2017 +0000
+++ b/lib/libc/citrus/citrus_lc_collate.c       Mon Jul 31 04:29:50 2017 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: citrus_lc_collate.c,v 1.1.2.1 2017/07/14 15:53:07 perseant Exp $ */
+/* $NetBSD: citrus_lc_collate.c,v 1.1.2.2 2017/07/31 04:29:50 perseant Exp $ */
 
 /*-
  * Copyright (c)2008 Citrus Project,
@@ -28,7 +28,7 @@
 
 #include <sys/cdefs.h>
 #if defined(LIBC_SCCS) && !defined(lint)
-__RCSID("$NetBSD: citrus_lc_collate.c,v 1.1.2.1 2017/07/14 15:53:07 perseant Exp $");
+__RCSID("$NetBSD: citrus_lc_collate.c,v 1.1.2.2 2017/07/31 04:29:50 perseant Exp $");
 #endif /* LIBC_SCCS and not lint */
 
 #include "reentrant.h"
@@ -65,13 +65,13 @@
 /*
  * macro required by nb_lc_template(_decl).h
  */
-#define _CATEGORY_TYPE          _CollateLocale
+#define _CATEGORY_TYPE          struct xlocale_collate
 
 #include "nb_lc_template_decl.h"
 
 static int
 _citrus_LC_COLLATE_create_impl(const char * __restrict root,
-    const char * __restrict name, _CollateLocale ** __restrict pdata)
+    const char * __restrict name, struct xlocale_collate ** __restrict pdata)
 {
        char path[PATH_MAX + 1];
        int ret;
@@ -92,7 +92,7 @@
 }
 
 static __inline void
-_PREFIX(update_global)(_CollateLocale *data)
+_PREFIX(update_global)(struct xlocale_collate *data)
 {
        _DIAGASSERT(data != NULL);
 }
diff -r 58f2e0472eef -r 88b641e341ad lib/libc/locale/collate_local.h
--- a/lib/libc/locale/collate_local.h   Mon Jul 31 04:23:35 2017 +0000
+++ b/lib/libc/locale/collate_local.h   Mon Jul 31 04:29:50 2017 +0000
@@ -1,26 +1,45 @@
-/*     $NetBSD: collate_local.h,v 1.1.2.1 2017/07/14 15:53:08 perseant Exp $   */
+/*     $NetBSD: collate_local.h,v 1.1.2.2 2017/07/31 04:29:50 perseant Exp $   */
 
 #ifndef        _COLLATE_LOCAL_H_
 #define        _COLLATE_LOCAL_H_
 
 #include <locale.h>
+#include "collate.h"
 
 #include "unicode_ucd.h"
 
 typedef struct _CollateLocale {
        void            *coll_variable;
        size_t           coll_variable_len;
-       struct ucd_coll *coll_data;
+       struct ucd_coll        *coll_data; /* XXX obsolescent */
        size_t           coll_data_len;
+       const struct _FileCollateLocale *coll_fcl;
+#define coll_collinfo  coll_fcl->fcl_collinfo
+#define coll_char_data coll_fcl->fcl_char_data
+       const collate_subst_t  *coll_subst;
+       const collate_chain_t  *coll_chains;
+       const collate_large_t  *coll_large;
 } _CollateLocale;
 
+typedef struct _FileCollateLocale {
+       collate_info_t  fcl_collinfo;
+       collate_char_t  fcl_char_data[0x100];
+/*
+        These fields are variable length (perhaps 0)
+       and follow the previous fields in the file:
+       collate_chain_t *chains;
+       collate_large_t *large;
+       collate_subst_t *subst;
+*/
+} _FileCollateLocale;
+
 /*
  * global variables
  */
-extern __dso_hidden const _CollateLocale _DefaultCollateLocale;
+extern __dso_hidden const struct xlocale_collate _DefaultCollateLocale;
 
 __BEGIN_DECLS
-int _collate_load(const char * __restrict, size_t, _CollateLocale ** __restrict);
+int _collate_load(const char * __restrict, size_t, struct xlocale_collate ** __restrict);
 __END_DECLS
 
 #endif /* !_COLLATE_LOCAL_H_ */
diff -r 58f2e0472eef -r 88b641e341ad lib/libc/locale/collate_locale.c
--- a/lib/libc/locale/collate_locale.c  Mon Jul 31 04:23:35 2017 +0000
+++ b/lib/libc/locale/collate_locale.c  Mon Jul 31 04:29:50 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: collate_locale.c,v 1.1.2.1 2017/07/14 15:53:08 perseant Exp $  */
+/*     $NetBSD: collate_locale.c,v 1.1.2.2 2017/07/31 04:29:50 perseant Exp $  */
 /*-
  * Copyright (c)2010 Citrus Project,
  * All rights reserved.
@@ -25,53 +25,152 @@
  * SUCH DAMAGE.
  */
 
+#ifdef COLLATION_TEST
+#include "collation_test.h"
+#endif
+
 #define __SETLOCALE_SOURCE__
 #include <assert.h>
 #include <errno.h>
 #include <locale.h>
-#include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <wchar.h>
 
+#include "collate.h"
+
 #include "setlocale_local.h"
 #include "collate_local.h"
-#include "unicode_collation_data.h"
+#include "ducet_collation_data.h"
 
+#ifndef COLLATION_TEST
 #include "citrus_module.h"
+#endif
 
-const _CollateLocale _DefaultCollateLocale = {
-       __UNCONST("DUCET"),
-       5,
-       &ucd_collate_data[0],
-       UCD_COLLATE_DATA_LENGTH
+const struct collate_info _DefaultCollateInfo = {
+       4,     /* uint8_t directive_count;               */
+       {},    /* uint8_t directive[COLL_WEIGHTS_MAX];   */
+       {},    /* int32_t pri_count[COLL_WEIGHTS_MAX];   */
+       0,     /* int32_t flags;                         */
+       DUCET_COLLATE_CHAINS_LENGTH, /* int32_t chain_count;       */
+       DUCET_COLLATE_LARGE_LENGTH, /* int32_t large_count;       */
+       { 0 }, /* int32_t subst_count[COLL_WEIGHTS_MAX]; */
+       { 0 }, /* int32_t undef_pri[COLL_WEIGHTS_MAX];   */
+       DUCET_COLLATE_RCHAINS_LENGTH, /* int32_t rchain_count;       */
+       DUCET_COLLATE_DCHAINS_LENGTH, /* int32_t rchain_count;       */
 };
 
+const struct xlocale_collate _DefaultCollateLocale = {
+       0,    /* int __collate_load_error; */
+       NULL, /* char * map; */
+       0,    /* size_t maplen; */
+
+       &_DefaultCollateInfo, /* collate_info_t *info; */
+       ducet_collate_chars, /* collate_char_t  *char_pri_table; */
+       ducet_collate_large, /* collate_large_t *large_pri_table; */
+       ducet_collate_chains, /* collate_chain_t        *chain_pri_table; */
+       ducet_collate_rchains, /* collate_rchain_t      *rchain_pri_table; */
+       ducet_collate_dchains, /* collate_dchain_t      *dchain_pri_table; */
+       NULL, /* collate_subst_t        *subst_table[COLL_WEIGHTS_MAX]; */
+};
+
+static int
+_collate_read_file(const char * __restrict, size_t,
+                  struct xlocale_collate ** __restrict);
+
 int
 _collate_load(const char * __restrict var, size_t lenvar,
-    _CollateLocale ** __restrict prl)
+             struct xlocale_collate ** __restrict prl)
 {
-       int ret;
-
        _DIAGASSERT(var != NULL || lenvar < 1);
        _DIAGASSERT(prl != NULL);
 
-       if (lenvar < 1)
+       if (lenvar < COLLATE_STR_LEN)
+               return EFTYPE;
+
+       if (strncmp(var, COLLATE_VERSION, COLLATE_STR_LEN))
+               return EFTYPE;
+
+       var += COLLATE_STR_LEN;
+       lenvar -= COLLATE_STR_LEN;
+
+       return _collate_read_file(var, lenvar, prl);
+       }
+
+static int
+_collate_read_file(const char * __restrict var, size_t lenvar,
+                  struct xlocale_collate ** __restrict prl)
+{
+       struct xlocale_collate *clp;
+       size_t section_length;
+       char *ci;
+
+       if (lenvar < sizeof(struct collate_info))
                return EFTYPE;
-       switch (*var) {
-       case 'U':
-#ifdef notyet
-               ret = _collate_read_file(var, lenvar, prl);
-#else
-               *prl = (_CollateLocale *)malloc(sizeof(**prl));
-               (*prl)->coll_variable = __UNCONST("FAKE");
-               (*prl)->coll_variable_len = 4;
-               (*prl)->coll_data = (struct ucd_coll *)malloc(sizeof(struct ucd_coll));
-               (*prl)->coll_data_len = 0;
-               ret = 0;
+
+       clp = (struct xlocale_collate *)malloc(sizeof(*clp));
+       ci = (char *)malloc(lenvar);
+       memcpy(ci, var, lenvar);
+
+       /* File header */
+       clp->info = (const struct collate_info *)ci;
+       ci += sizeof(*clp->info);
+       lenvar -= sizeof(*clp->info);
+
+       /* Table of narrow character priorities */
+       clp->char_pri_table = (const collate_char_t *)ci;
+       section_length = 0x80 * sizeof(collate_chain_t);
+       if (lenvar < section_length)
+               goto errout;
+       ci += section_length;
+       lenvar -= section_length;
+
+       /* Collation elements ("chains") */
+       clp->chain_pri_table = (const collate_chain_t *)ci;
+       section_length = clp->info->chain_count * sizeof(collate_chain_t);
+       if (lenvar < section_length)
+               goto errout;
+       ci += section_length;
+       lenvar -= section_length;
+
+       /* Collation weights for characters > 0x80 */
+       clp->large_pri_table = (const collate_large_t *)ci;
+       section_length = clp->info->large_count * sizeof(collate_large_t);
+       if (lenvar < section_length)
+               goto errout;
+       ci += section_length;
+       lenvar -= section_length;
+
+#if 0
+       /* Substitutions */
+       clp->subst_table = (collate_subst_t *)ci;
+       section_length = clp->info->subst_count * sizeof(collate_subst_t);
+       if (lenvar < section_length)
+               goto errout;
+       ci += section_length;
+       lenvar -= section_length;
 #endif
-               break;
-       default:
-               ret = EFTYPE;
-       }
-       return ret;
+
+       /* Characters that have more than one associated weight (reverse chains) */
+       clp->rchain_pri_table = (const collate_rchain_t *)ci;
+       section_length = clp->info->rchain_count * sizeof(collate_rchain_t);
+       if (lenvar < section_length)
+               goto errout;
+       ci += section_length;
+       lenvar -= section_length;
+
+       /* Double chains (>1 char to >1 weight mapping) */
+       clp->dchain_pri_table = (const collate_dchain_t *)ci;
+       section_length = clp->info->dchain_count * sizeof(collate_dchain_t);
+       if (lenvar < section_length)
+               goto errout;
+       ci += section_length;
+       lenvar -= section_length;
+
+       *prl = clp;
+       return 0;
+
+errout:
+       free(clp);
+       return EFTYPE;
 }
diff -r 58f2e0472eef -r 88b641e341ad lib/libc/locale/unicode_collate.c
--- a/lib/libc/locale/unicode_collate.c Mon Jul 31 04:23:35 2017 +0000
+++ b/lib/libc/locale/unicode_collate.c Mon Jul 31 04:29:50 2017 +0000
@@ -27,7 +27,12 @@
  * SUCH DAMAGE.
  */
 
+#ifdef COLLATION_TEST
+#include "collation_test.h"
+#endif /* COLLATION_TEST */
+
 #include <sys/queue.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <locale.h>



Home | Main Index | Thread Index | Old Index