Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src Add support to msdosfs and makefs to generate correct Unicod...
details: https://anonhg.NetBSD.org/src/rev/19e84794d911
branches: trunk
changeset: 343343:19e84794d911
user: mlelstv <mlelstv%NetBSD.org@localhost>
date: Sat Jan 30 09:59:27 2016 +0000
description:
Add support to msdosfs and makefs to generate correct Unicode (UCS-2) directory
entries from UTF8 encoded file names.
diffstat:
sbin/mount_msdos/mount_msdos.8 | 17 +-
sbin/mount_msdos/mount_msdos.c | 9 +-
sys/fs/msdosfs/direntry.h | 11 +-
sys/fs/msdosfs/msdosfs_conv.c | 1492 ++++++++++++++++++++++++++++---
sys/fs/msdosfs/msdosfs_lookup.c | 14 +-
sys/fs/msdosfs/msdosfs_vnops.c | 6 +-
sys/fs/msdosfs/msdosfsmount.h | 7 +-
usr.sbin/makefs/msdos.c | 43 +-
usr.sbin/makefs/msdos/msdosfs_vfsops.c | 8 +-
usr.sbin/makefs/msdos/msdosfs_vnops.c | 11 +-
10 files changed, 1402 insertions(+), 216 deletions(-)
diffs (truncated from 2028 to 300 lines):
diff -r db25379644ae -r 19e84794d911 sbin/mount_msdos/mount_msdos.8
--- a/sbin/mount_msdos/mount_msdos.8 Sat Jan 30 05:15:18 2016 +0000
+++ b/sbin/mount_msdos/mount_msdos.8 Sat Jan 30 09:59:27 2016 +0000
@@ -1,4 +1,4 @@
-.\" $NetBSD: mount_msdos.8,v 1.36 2012/11/16 15:00:18 tsutsui Exp $
+.\" $NetBSD: mount_msdos.8,v 1.37 2016/01/30 09:59:27 mlelstv Exp $
.\"
.\" Copyright (c) 1993, 1994 Christopher G. Demetriou
.\" All rights reserved.
@@ -40,7 +40,7 @@
.Nd mount an MS-DOS file system
.Sh SYNOPSIS
.Nm
-.Op Fl 9Gls
+.Op Fl 9GlsU
.Op Fl g Ar gid
.Op Fl M Ar mask
.Op Fl m Ar mask
@@ -111,6 +111,19 @@
Otherwise
.Fl l
is assumed.
+.It Fl U
+The MS-DOS file system stores filenames in a short
+version using 8-bit characters according to some
+character set and a long version with 16-bit unicode
+characters.
+The default method to store encoding-agnostic UNIX filenames
+is to copy them byte-wise into both fields. This is
+transparent but generates wrong unicode characters
+for anything that is not ASCII. Setting the
+.Fl U
+flag interprets UNIX filenames as UTF-8 and generates
+correctly encoded long filenames. This forces
+.Fl l .
.It Fl M Ar mask
Specify the maximum file permissions for directories
in the file system.
diff -r db25379644ae -r 19e84794d911 sbin/mount_msdos/mount_msdos.c
--- a/sbin/mount_msdos/mount_msdos.c Sat Jan 30 05:15:18 2016 +0000
+++ b/sbin/mount_msdos/mount_msdos.c Sat Jan 30 09:59:27 2016 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: mount_msdos.c,v 1.47 2009/10/07 20:34:02 pooka Exp $ */
+/* $NetBSD: mount_msdos.c,v 1.48 2016/01/30 09:59:27 mlelstv Exp $ */
/*
* Copyright (c) 1994 Christopher G. Demetriou
@@ -36,7 +36,7 @@
#include <sys/cdefs.h>
#ifndef lint
-__RCSID("$NetBSD: mount_msdos.c,v 1.47 2009/10/07 20:34:02 pooka Exp $");
+__RCSID("$NetBSD: mount_msdos.c,v 1.48 2016/01/30 09:59:27 mlelstv Exp $");
#endif /* not lint */
#include <sys/param.h>
@@ -94,7 +94,7 @@
*mntflags = set_gid = set_uid = set_mask = set_dirmask = set_gmtoff = 0;
(void)memset(args, '\0', sizeof(*args));
- while ((c = getopt(argc, argv, "Gsl9u:g:m:M:o:t:")) != -1) {
+ while ((c = getopt(argc, argv, "Gsl9Uu:g:m:M:o:t:")) != -1) {
switch (c) {
case 'G':
args->flags |= MSDOSFSMNT_GEMDOSFS;
@@ -108,6 +108,9 @@
case '9':
args->flags |= MSDOSFSMNT_NOWIN95;
break;
+ case 'U':
+ args->flags |= MSDOSFSMNT_UTF8;
+ break;
case 'u':
args->uid = a_uid(optarg);
set_uid = 1;
diff -r db25379644ae -r 19e84794d911 sys/fs/msdosfs/direntry.h
--- a/sys/fs/msdosfs/direntry.h Sat Jan 30 05:15:18 2016 +0000
+++ b/sys/fs/msdosfs/direntry.h Sat Jan 30 09:59:27 2016 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: direntry.h,v 1.9 2016/01/23 01:26:14 dholland Exp $ */
+/* $NetBSD: direntry.h,v 1.10 2016/01/30 09:59:27 mlelstv Exp $ */
/*-
* Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
@@ -136,11 +136,12 @@
int unix2dosfn(const unsigned char *un, unsigned char dn[12], int unlen,
unsigned int gen);
int unix2winfn(const unsigned char *un, int unlen, struct winentry *wep,
- int cnt, int chksum);
+ int cnt, int chksum, int utf8);
int winChkName(const unsigned char *un, int unlen, struct winentry *wep,
- int chksum);
-int win2unixfn(struct winentry *wep, struct dirent *dp, int chksum);
+ int chksum, int utf8);
+int win2unixfn(struct winentry *wep, struct dirent *dp, int chksum,
+ int utf8);
uint8_t winChksum(uint8_t *name);
-int winSlotCnt(const unsigned char *un, int unlen);
+int winSlotCnt(const unsigned char *un, int unlen, int utf8);
#endif /* _KERNEL || MAKEFS */
#endif /* _MSDOSFS_DIRENTRY_H_ */
diff -r db25379644ae -r 19e84794d911 sys/fs/msdosfs/msdosfs_conv.c
--- a/sys/fs/msdosfs/msdosfs_conv.c Sat Jan 30 05:15:18 2016 +0000
+++ b/sys/fs/msdosfs/msdosfs_conv.c Sat Jan 30 09:59:27 2016 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: msdosfs_conv.c,v 1.10 2014/09/01 09:09:47 martin Exp $ */
+/* $NetBSD: msdosfs_conv.c,v 1.11 2016/01/30 09:59:27 mlelstv Exp $ */
/*-
* Copyright (C) 1995, 1997 Wolfgang Solfrank.
@@ -45,6 +45,16 @@
* any damages caused by this software.
*
* October 1992
+ *
+ *
+ * Unicode 5.0 case folding taken from
+ *
+ * http://www.unicode.org/Public/5.0.0/ucd/CaseFolding.txt
+ *
+ * Unicode Character Database
+ * Copyright (c) 1991-2006 Unicode, Inc.
+ * For terms of use, see http://www.unicode.org/terms_of_use.html
+ * For documentation, see UCD.html
*/
#if HAVE_NBTOOL_CONFIG_H
@@ -52,13 +62,14 @@
#endif
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: msdosfs_conv.c,v 1.10 2014/09/01 09:09:47 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: msdosfs_conv.c,v 1.11 2016/01/30 09:59:27 mlelstv Exp $");
/*
* System include files.
*/
#include <sys/param.h>
#include <sys/time.h>
+#include <sys/endian.h>
#ifdef _KERNEL
#include <sys/dirent.h>
#include <sys/systm.h>
@@ -78,6 +89,22 @@
#include <fs/msdosfs/direntry.h>
#include <fs/msdosfs/denode.h>
+static int invalidname(const u_int16_t *, int);
+
+static int ucs2utf8(const u_int16_t *, u_int8_t *, int);
+static int utf8ucs2(const u_int8_t *, int, u_int16_t *);
+
+static int ucs2utf8str(const u_int16_t *, int, u_int8_t *, int);
+static int utf8ucs2str(const u_int8_t *, int, u_int16_t *, int);
+static int ucs2char8str(const u_int16_t *, int, u_int8_t *, int);
+static int char8ucs2str(const u_int8_t *, int, u_int16_t *, int);
+
+static void ucs2pad(u_int16_t *, int, int);
+
+static u_int16_t ucs2fold(u_int16_t);
+static int ucs2match(u_int16_t *, u_int16_t *, int n);
+static int char8match(u_int16_t *, u_int16_t *, int n);
+
/*
* The number of seconds between Jan 1, 1970 and Jan 1, 1980. In that
* interval there were 8 regular years and 2 leap years.
@@ -284,6 +311,905 @@
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */
};
+/* Unicode case folding for codes 0x0000..0xffff */
+static const u_int16_t
+foldmap[] = {
+ 0x0041, 0x0061, /* LATIN CAPITAL LETTER A */
+ 0x0042, 0x0062, /* LATIN CAPITAL LETTER B */
+ 0x0043, 0x0063, /* LATIN CAPITAL LETTER C */
+ 0x0044, 0x0064, /* LATIN CAPITAL LETTER D */
+ 0x0045, 0x0065, /* LATIN CAPITAL LETTER E */
+ 0x0046, 0x0066, /* LATIN CAPITAL LETTER F */
+ 0x0047, 0x0067, /* LATIN CAPITAL LETTER G */
+ 0x0048, 0x0068, /* LATIN CAPITAL LETTER H */
+ 0x0049, 0x0069, /* LATIN CAPITAL LETTER I */
+ 0x004A, 0x006A, /* LATIN CAPITAL LETTER J */
+ 0x004B, 0x006B, /* LATIN CAPITAL LETTER K */
+ 0x004C, 0x006C, /* LATIN CAPITAL LETTER L */
+ 0x004D, 0x006D, /* LATIN CAPITAL LETTER M */
+ 0x004E, 0x006E, /* LATIN CAPITAL LETTER N */
+ 0x004F, 0x006F, /* LATIN CAPITAL LETTER O */
+ 0x0050, 0x0070, /* LATIN CAPITAL LETTER P */
+ 0x0051, 0x0071, /* LATIN CAPITAL LETTER Q */
+ 0x0052, 0x0072, /* LATIN CAPITAL LETTER R */
+ 0x0053, 0x0073, /* LATIN CAPITAL LETTER S */
+ 0x0054, 0x0074, /* LATIN CAPITAL LETTER T */
+ 0x0055, 0x0075, /* LATIN CAPITAL LETTER U */
+ 0x0056, 0x0076, /* LATIN CAPITAL LETTER V */
+ 0x0057, 0x0077, /* LATIN CAPITAL LETTER W */
+ 0x0058, 0x0078, /* LATIN CAPITAL LETTER X */
+ 0x0059, 0x0079, /* LATIN CAPITAL LETTER Y */
+ 0x005A, 0x007A, /* LATIN CAPITAL LETTER Z */
+ 0x00B5, 0x03BC, /* MICRO SIGN */
+ 0x00C0, 0x00E0, /* LATIN CAPITAL LETTER A WITH GRAVE */
+ 0x00C1, 0x00E1, /* LATIN CAPITAL LETTER A WITH ACUTE */
+ 0x00C2, 0x00E2, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
+ 0x00C3, 0x00E3, /* LATIN CAPITAL LETTER A WITH TILDE */
+ 0x00C4, 0x00E4, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
+ 0x00C5, 0x00E5, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
+ 0x00C6, 0x00E6, /* LATIN CAPITAL LETTER AE */
+ 0x00C7, 0x00E7, /* LATIN CAPITAL LETTER C WITH CEDILLA */
+ 0x00C8, 0x00E8, /* LATIN CAPITAL LETTER E WITH GRAVE */
+ 0x00C9, 0x00E9, /* LATIN CAPITAL LETTER E WITH ACUTE */
+ 0x00CA, 0x00EA, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
+ 0x00CB, 0x00EB, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
+ 0x00CC, 0x00EC, /* LATIN CAPITAL LETTER I WITH GRAVE */
+ 0x00CD, 0x00ED, /* LATIN CAPITAL LETTER I WITH ACUTE */
+ 0x00CE, 0x00EE, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
+ 0x00CF, 0x00EF, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
+ 0x00D0, 0x00F0, /* LATIN CAPITAL LETTER ETH */
+ 0x00D1, 0x00F1, /* LATIN CAPITAL LETTER N WITH TILDE */
+ 0x00D2, 0x00F2, /* LATIN CAPITAL LETTER O WITH GRAVE */
+ 0x00D3, 0x00F3, /* LATIN CAPITAL LETTER O WITH ACUTE */
+ 0x00D4, 0x00F4, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
+ 0x00D5, 0x00F5, /* LATIN CAPITAL LETTER O WITH TILDE */
+ 0x00D6, 0x00F6, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
+ 0x00D8, 0x00F8, /* LATIN CAPITAL LETTER O WITH STROKE */
+ 0x00D9, 0x00F9, /* LATIN CAPITAL LETTER U WITH GRAVE */
+ 0x00DA, 0x00FA, /* LATIN CAPITAL LETTER U WITH ACUTE */
+ 0x00DB, 0x00FB, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
+ 0x00DC, 0x00FC, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
+ 0x00DD, 0x00FD, /* LATIN CAPITAL LETTER Y WITH ACUTE */
+ 0x00DE, 0x00FE, /* LATIN CAPITAL LETTER THORN */
+ 0x0100, 0x0101, /* LATIN CAPITAL LETTER A WITH MACRON */
+ 0x0102, 0x0103, /* LATIN CAPITAL LETTER A WITH BREVE */
+ 0x0104, 0x0105, /* LATIN CAPITAL LETTER A WITH OGONEK */
+ 0x0106, 0x0107, /* LATIN CAPITAL LETTER C WITH ACUTE */
+ 0x0108, 0x0109, /* LATIN CAPITAL LETTER C WITH CIRCUMFLEX */
+ 0x010A, 0x010B, /* LATIN CAPITAL LETTER C WITH DOT ABOVE */
+ 0x010C, 0x010D, /* LATIN CAPITAL LETTER C WITH CARON */
+ 0x010E, 0x010F, /* LATIN CAPITAL LETTER D WITH CARON */
+ 0x0110, 0x0111, /* LATIN CAPITAL LETTER D WITH STROKE */
+ 0x0112, 0x0113, /* LATIN CAPITAL LETTER E WITH MACRON */
+ 0x0114, 0x0115, /* LATIN CAPITAL LETTER E WITH BREVE */
+ 0x0116, 0x0117, /* LATIN CAPITAL LETTER E WITH DOT ABOVE */
+ 0x0118, 0x0119, /* LATIN CAPITAL LETTER E WITH OGONEK */
+ 0x011A, 0x011B, /* LATIN CAPITAL LETTER E WITH CARON */
+ 0x011C, 0x011D, /* LATIN CAPITAL LETTER G WITH CIRCUMFLEX */
+ 0x011E, 0x011F, /* LATIN CAPITAL LETTER G WITH BREVE */
+ 0x0120, 0x0121, /* LATIN CAPITAL LETTER G WITH DOT ABOVE */
+ 0x0122, 0x0123, /* LATIN CAPITAL LETTER G WITH CEDILLA */
+ 0x0124, 0x0125, /* LATIN CAPITAL LETTER H WITH CIRCUMFLEX */
+ 0x0126, 0x0127, /* LATIN CAPITAL LETTER H WITH STROKE */
+ 0x0128, 0x0129, /* LATIN CAPITAL LETTER I WITH TILDE */
+ 0x012A, 0x012B, /* LATIN CAPITAL LETTER I WITH MACRON */
+ 0x012C, 0x012D, /* LATIN CAPITAL LETTER I WITH BREVE */
+ 0x012E, 0x012F, /* LATIN CAPITAL LETTER I WITH OGONEK */
+ 0x0132, 0x0133, /* LATIN CAPITAL LIGATURE IJ */
+ 0x0134, 0x0135, /* LATIN CAPITAL LETTER J WITH CIRCUMFLEX */
+ 0x0136, 0x0137, /* LATIN CAPITAL LETTER K WITH CEDILLA */
+ 0x0139, 0x013A, /* LATIN CAPITAL LETTER L WITH ACUTE */
+ 0x013B, 0x013C, /* LATIN CAPITAL LETTER L WITH CEDILLA */
+ 0x013D, 0x013E, /* LATIN CAPITAL LETTER L WITH CARON */
+ 0x013F, 0x0140, /* LATIN CAPITAL LETTER L WITH MIDDLE DOT */
+ 0x0141, 0x0142, /* LATIN CAPITAL LETTER L WITH STROKE */
+ 0x0143, 0x0144, /* LATIN CAPITAL LETTER N WITH ACUTE */
+ 0x0145, 0x0146, /* LATIN CAPITAL LETTER N WITH CEDILLA */
+ 0x0147, 0x0148, /* LATIN CAPITAL LETTER N WITH CARON */
+ 0x014A, 0x014B, /* LATIN CAPITAL LETTER ENG */
+ 0x014C, 0x014D, /* LATIN CAPITAL LETTER O WITH MACRON */
+ 0x014E, 0x014F, /* LATIN CAPITAL LETTER O WITH BREVE */
+ 0x0150, 0x0151, /* LATIN CAPITAL LETTER O WITH DOUBLE ACUTE */
+ 0x0152, 0x0153, /* LATIN CAPITAL LIGATURE OE */
+ 0x0154, 0x0155, /* LATIN CAPITAL LETTER R WITH ACUTE */
+ 0x0156, 0x0157, /* LATIN CAPITAL LETTER R WITH CEDILLA */
+ 0x0158, 0x0159, /* LATIN CAPITAL LETTER R WITH CARON */
+ 0x015A, 0x015B, /* LATIN CAPITAL LETTER S WITH ACUTE */
+ 0x015C, 0x015D, /* LATIN CAPITAL LETTER S WITH CIRCUMFLEX */
+ 0x015E, 0x015F, /* LATIN CAPITAL LETTER S WITH CEDILLA */
+ 0x0160, 0x0161, /* LATIN CAPITAL LETTER S WITH CARON */
+ 0x0162, 0x0163, /* LATIN CAPITAL LETTER T WITH CEDILLA */
+ 0x0164, 0x0165, /* LATIN CAPITAL LETTER T WITH CARON */
+ 0x0166, 0x0167, /* LATIN CAPITAL LETTER T WITH STROKE */
+ 0x0168, 0x0169, /* LATIN CAPITAL LETTER U WITH TILDE */
+ 0x016A, 0x016B, /* LATIN CAPITAL LETTER U WITH MACRON */
+ 0x016C, 0x016D, /* LATIN CAPITAL LETTER U WITH BREVE */
+ 0x016E, 0x016F, /* LATIN CAPITAL LETTER U WITH RING ABOVE */
+ 0x0170, 0x0171, /* LATIN CAPITAL LETTER U WITH DOUBLE ACUTE */
+ 0x0172, 0x0173, /* LATIN CAPITAL LETTER U WITH OGONEK */
+ 0x0174, 0x0175, /* LATIN CAPITAL LETTER W WITH CIRCUMFLEX */
+ 0x0176, 0x0177, /* LATIN CAPITAL LETTER Y WITH CIRCUMFLEX */
+ 0x0178, 0x00FF, /* LATIN CAPITAL LETTER Y WITH DIAERESIS */
+ 0x0179, 0x017A, /* LATIN CAPITAL LETTER Z WITH ACUTE */
+ 0x017B, 0x017C, /* LATIN CAPITAL LETTER Z WITH DOT ABOVE */
+ 0x017D, 0x017E, /* LATIN CAPITAL LETTER Z WITH CARON */
+ 0x017F, 0x0073, /* LATIN SMALL LETTER LONG S */
+ 0x0181, 0x0253, /* LATIN CAPITAL LETTER B WITH HOOK */
+ 0x0182, 0x0183, /* LATIN CAPITAL LETTER B WITH TOPBAR */
+ 0x0184, 0x0185, /* LATIN CAPITAL LETTER TONE SIX */
+ 0x0186, 0x0254, /* LATIN CAPITAL LETTER OPEN O */
+ 0x0187, 0x0188, /* LATIN CAPITAL LETTER C WITH HOOK */
+ 0x0189, 0x0256, /* LATIN CAPITAL LETTER AFRICAN D */
+ 0x018A, 0x0257, /* LATIN CAPITAL LETTER D WITH HOOK */
Home |
Main Index |
Thread Index |
Old Index