Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src Fix assorted 64->32 truncations related to BLOCK_INFO.



details:   https://anonhg.NetBSD.org/src/rev/89082106abfa
branches:  trunk
changeset: 339822:89082106abfa
user:      dholland <dholland%NetBSD.org@localhost>
date:      Wed Aug 12 18:23:16 2015 +0000

description:
Fix assorted 64->32 truncations related to BLOCK_INFO.

Also make note of a cleaner limitation: it seems that when it goes to
coalesce discontiguous files, it mallocs an array with one BLOCK_INFO
for every block in the file. Therefore, with 64-bit LFS, on a 32-bit
platform it will be possible to have files large enough to overflow
the cleaner's address space. Currently these will be skipped and cause
warnings via syslog.

At some point someone should rewrite the logic to coalesce files to
use chunks of some reasonable size, as discontinuity between such
chunks is immaterial and mallocing this much space is silly and
fragile. Also, the kernel only accepts up to 65536 blocks at a time
for bmapv and markv, so processing more than this at once probably
isn't useful and may not even work currently. I don't want to change
this around just now as it's not entirely trivial.

diffstat:

 libexec/lfs_cleanerd/cleaner.h      |   3 +-
 libexec/lfs_cleanerd/coalesce.c     |  85 ++++++++++++++++++++++++++++--------
 libexec/lfs_cleanerd/lfs_cleanerd.c |  35 +++++++++++---
 sys/ufs/lfs/lfs.h                   |   4 +-
 sys/ufs/lfs/lfs_extern.h            |   3 +-
 sys/ufs/lfs/lfs_syscalls.c          |  20 ++++----
 6 files changed, 106 insertions(+), 44 deletions(-)

diffs (truncated from 412 to 300 lines):

diff -r e30560ae39f8 -r 89082106abfa libexec/lfs_cleanerd/cleaner.h
--- a/libexec/lfs_cleanerd/cleaner.h    Wed Aug 12 18:22:22 2015 +0000
+++ b/libexec/lfs_cleanerd/cleaner.h    Wed Aug 12 18:23:16 2015 +0000
@@ -69,14 +69,13 @@
 int needs_cleaning(struct clfs *, CLEANERINFO *);
 int reinit_fs(struct clfs *);
 void reload_ifile(struct clfs *);
-void toss_old_blocks(struct clfs *, BLOCK_INFO **, int *, int *);
+void toss_old_blocks(struct clfs *, BLOCK_INFO **, blkcnt_t *, int *);
 
 /* cleansrv.c */
 void check_control_socket(void);
 void try_to_become_master(int, char **);
 
 /* coalesce.c */
-int log2int(int);
 int clean_all_inodes(struct clfs *);
 int fork_coalesce(struct clfs *);
 
diff -r e30560ae39f8 -r 89082106abfa libexec/lfs_cleanerd/coalesce.c
--- a/libexec/lfs_cleanerd/coalesce.c   Wed Aug 12 18:22:22 2015 +0000
+++ b/libexec/lfs_cleanerd/coalesce.c   Wed Aug 12 18:23:16 2015 +0000
@@ -1,4 +1,4 @@
-/*      $NetBSD: coalesce.c,v 1.28 2015/07/28 05:14:23 dholland Exp $  */
+/*      $NetBSD: coalesce.c,v 1.29 2015/08/12 18:23:16 dholland Exp $  */
 
 /*-
  * Copyright (c) 2002, 2005 The NetBSD Foundation, Inc.
@@ -60,7 +60,12 @@
 
 extern int debug, do_mmap;
 
-int log2int(int n)
+/*
+ * XXX return the arg to just int when/if we don't need it for
+ * potentially huge block counts any more.
+ */
+static int
+log2int(intmax_t n)
 {
        int log;
 
@@ -151,8 +156,8 @@
                int blkcnt;
        } */ lim;
        daddr_t toff;
-       int i;
-       int nb, onb, noff;
+       int noff;
+       blkcnt_t i, nb, onb;
        int retval;
        int bps;
 
@@ -178,16 +183,36 @@
        }
 #endif
        if (nb > dip->di_blocks) {
-               dlog("ino %d, computed blocks %d > held blocks %d", ino, nb,
-                    dip->di_blocks);
+               dlog("ino %ju, computed blocks %jd > held blocks %ju",
+                    (uintmax_t)ino, (intmax_t)nb,
+                    (uintmax_t)dip->di_blocks);
                free(dip);
                return COALESCE_BADBLOCKSIZE;
        }
 
+       /*
+        * XXX: We should really coalesce really large files in
+        * chunks, as there's substantial diminishing returns and
+        * mallocing huge amounts of memory just to get those returns
+        * is pretty silly. But that requires a big rework of this
+        * code. (On the plus side though then we can stop worrying
+        * about block counts > 2^31.)
+        */
+
+       /* ugh, no DADDR_T_MAX */
+       __CTASSERT(sizeof(daddr_t) == sizeof(int64_t));
+       if (nb > INT64_MAX / sizeof(BLOCK_INFO)) {
+               syslog(LOG_WARNING, "ino %ju, %jd blocks: array too large\n",
+                      (uintmax_t)ino, (uintmax_t)nb);
+               free(dip);
+               return COALESCE_NOMEM;
+       }
+
        bip = (BLOCK_INFO *)malloc(sizeof(BLOCK_INFO) * nb);
        if (bip == NULL) {
-               syslog(LOG_WARNING, "ino %llu, %d blocks: %m",
-                   (unsigned long long)ino, nb);
+               syslog(LOG_WARNING, "ino %llu, %jd blocks: %s\n",
+                   (unsigned long long)ino, (intmax_t)nb,
+                   strerror(errno));
                free(dip);
                return COALESCE_NOMEM;
        }
@@ -198,6 +223,18 @@
                bip[i].bi_version = dip->di_gen;
                /* Don't set the size, but let lfs_bmap fill it in */
        }
+       /*
+        * The kernel also contains this check; but as lim.blkcnt is
+        * only 32 bits wide, we need to check ourselves too in case
+        * we'd otherwise truncate a value > 2^31, as that might
+        * succeed and create bizarre results.
+        */
+       if (nb > LFS_MARKV_MAXBLKCNT) {
+               syslog(LOG_WARNING, "%s: coalesce: LFCNBMAPV: Too large\n",
+                      lfs_sb_getfsmnt(fs));
+               retval = COALESCE_BADBMAPV;
+               goto out;
+       }
        lim.blkiov = bip;
        lim.blkcnt = nb;
        if (kops.ko_fcntl(fs->clfs_ifilefd, LFCNBMAPV, &lim) < 0) { 
@@ -208,13 +245,15 @@
        }
 #if 0
        for (i = 0; i < nb; i++) {
-               printf("bi_size = %d, bi_ino = %d, "
-                   "bi_lbn = %d, bi_daddr = %d\n",
-                   bip[i].bi_size, bip[i].bi_inode, bip[i].bi_lbn,
-                   bip[i].bi_daddr);
+               printf("bi_size = %d, bi_ino = %ju, "
+                   "bi_lbn = %jd, bi_daddr = %jd\n",
+                   bip[i].bi_size, (uintmax_t)bip[i].bi_inode,
+                   (intmax_t)bip[i].bi_lbn,
+                   (intmax_t)bip[i].bi_daddr);
        }
 #endif
-       noff = toff = 0;
+       noff = 0;
+       toff = 0;
        for (i = 1; i < nb; i++) {
                if (bip[i].bi_daddr != bip[i - 1].bi_daddr + lfs_sb_getfrag(fs))
                        ++noff;
@@ -235,8 +274,8 @@
                goto out;
        } else if (debug)
                syslog(LOG_DEBUG, "ino %llu total discontinuity "
-                   "%d (%lld) for %d blocks", (unsigned long long)ino,
-                   noff, (long long)toff, nb);
+                   "%d (%jd) for %jd blocks", (unsigned long long)ino,
+                   noff, (intmax_t)toff, (intmax_t)nb);
 
        /* Search for blocks in active segments; don't move them. */
        for (i = 0; i < nb; i++) {
@@ -274,8 +313,8 @@
        for (i = 0; i < nb; i++) {
                bip[i].bi_bp = malloc(bip[i].bi_size);
                if (bip[i].bi_bp == NULL) {
-                       syslog(LOG_WARNING, "allocate block buffer size=%d: %m",
-                           bip[i].bi_size);
+                       syslog(LOG_WARNING, "allocate block buffer size=%d: %s\n",
+                           bip[i].bi_size, strerror(errno));
                        retval = COALESCE_NOMEM;
                        goto out;
                }
@@ -287,13 +326,16 @@
                }
        }
        if (debug)
-               syslog(LOG_DEBUG, "ino %llu markv %d blocks",
-                   (unsigned long long)ino, nb);
+               syslog(LOG_DEBUG, "ino %ju markv %jd blocks",
+                   (uintmax_t)ino, (intmax_t)nb);
 
        /*
         * Write in segment-sized chunks.  If at any point we'd write more
         * than half of the available segments, sleep until that's not
         * true any more.
+        *
+        * XXX the pointer arithmetic in this loop is illegal; replace
+        * TBIP with an integer (blkcnt_t) offset.
         */
        bps = lfs_segtod(fs, 1);
        for (tbip = bip; tbip < bip + nb; tbip += bps) {
@@ -307,6 +349,11 @@
                                    LFCNSEGWAIT, NULL);
                } while(cip.clean < 4);
 
+               /*
+                * Note that although lim.blkcnt is 32 bits wide, bps
+                * (which is blocks-per-segment) is < 2^32 so the
+                * value assigned here is always in range.
+                */
                lim.blkiov = tbip;
                lim.blkcnt = (tbip + bps < bip + nb ? bps : nb % bps);
                if (kops.ko_fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim) < 0) {
diff -r e30560ae39f8 -r 89082106abfa libexec/lfs_cleanerd/lfs_cleanerd.c
--- a/libexec/lfs_cleanerd/lfs_cleanerd.c       Wed Aug 12 18:22:22 2015 +0000
+++ b/libexec/lfs_cleanerd/lfs_cleanerd.c       Wed Aug 12 18:23:16 2015 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: lfs_cleanerd.c,v 1.44 2015/08/02 18:18:09 dholland Exp $    */
+/* $NetBSD: lfs_cleanerd.c,v 1.45 2015/08/12 18:23:16 dholland Exp $    */
 
 /*-
  * Copyright (c) 2005 The NetBSD Foundation, Inc.
@@ -791,7 +791,7 @@
                return -1;
        if (b->bi_lbn == LFS_UNUSED_LBN)
                return 1;
-       if ((u_int32_t)a->bi_lbn > (u_int32_t)b->bi_lbn)
+       if ((u_int64_t)a->bi_lbn > (u_int64_t)b->bi_lbn)
                return 1;
        else
                return -1;
@@ -813,9 +813,10 @@
 }
 
 void
-toss_old_blocks(struct clfs *fs, BLOCK_INFO **bipp, int *bic, int *sizep)
+toss_old_blocks(struct clfs *fs, BLOCK_INFO **bipp, blkcnt_t *bic, int *sizep)
 {
-       int i, r;
+       blkcnt_t i;
+       int r;
        BLOCK_INFO *bip = *bipp;
        struct lfs_fcntl_markv /* {
                BLOCK_INFO *blkiov;
@@ -832,6 +833,13 @@
        for (i = 0; i < *bic; i++)
                bip[i].bi_segcreate = bip[i].bi_daddr;
 
+       /*
+        * XXX: blkcnt_t is 64 bits, so *bic might overflow size_t
+        * (the argument type of heapsort's number argument) on a
+        * 32-bit platform. However, if so we won't have got this far
+        * because we'll have failed trying to allocate the array. So
+        * while *bic here might cause a 64->32 truncation, it's safe.
+        */
        /* Sort the blocks */
        heapsort(bip, *bic, sizeof(BLOCK_INFO), bi_comparator);
 
@@ -870,6 +878,7 @@
 {
        BLOCK_INFO *bip;
        int i, r, bic;
+       blkcnt_t widebic;
        off_t nb;
        double util;
        struct lfs_fcntl_markv /* {
@@ -884,7 +893,9 @@
        fs->clfs_nactive = 0;
        if (load_segment(fs, sn, &bip, &bic) <= 0)
                return -1;
-       toss_old_blocks(fs, &bip, &bic, NULL);
+       widebic = bic;
+       toss_old_blocks(fs, &bip, &widebic, NULL);
+       bic = widebic;
 
        /* Record statistics */
        for (i = nb = 0; i < bic; i++)
@@ -925,7 +936,7 @@
  * if the block needs to be added, 0 if it is already represented.
  */
 static int
-check_or_add(ino_t ino, int32_t lbn, BLOCK_INFO *bip, int bic, BLOCK_INFO **ebipp, int *ebicp)
+check_or_add(ino_t ino, daddr_t lbn, BLOCK_INFO *bip, int bic, BLOCK_INFO **ebipp, int *ebicp)
 {
        BLOCK_INFO *t, *ebip = *ebipp;
        int ebic = *ebicp;
@@ -975,7 +986,7 @@
        int num;
        int i, j, ebic;
        BLOCK_INFO *ebip;
-       int32_t lbn;
+       daddr_t lbn;
 
        start = 0;
        ebip = NULL;
@@ -998,6 +1009,7 @@
                if (bip[i].bi_lbn < ULFS_NDADDR)
                        continue;
 
+               /* XXX the struct lfs cast is completely wrong/unsafe */
                ulfs_getlbns((struct lfs *)fs, NULL, (daddr_t)bip[i].bi_lbn, in, &num);
                for (j = 0; j < num; j++) {
                        check_or_add(bip[i].bi_inode, in[j].in_lbn,
@@ -1016,6 +1028,7 @@
 clean_fs(struct clfs *fs, CLEANERINFO *cip)
 {
        int i, j, ngood, sn, bic, r, npos;
+       blkcnt_t widebic;
        int bytes, totbytes;
        struct ubuf *bp;
        SEGUSE *sup;
@@ -1096,7 +1109,9 @@
                             fs->clfs_segtabp[i]->nbytes);
                        if ((r = load_segment(fs, sn, &bip, &bic)) > 0) {
                                ++ngood;
-                               toss_old_blocks(fs, &bip, &bic, &bytes);
+                               widebic = bic;
+                               toss_old_blocks(fs, &bip, &widebic, &bytes);
+                               bic = widebic;
                                totbytes += bytes;
                        } else if (r == 0)
                                fd_release(fs->clfs_devvp);
@@ -1124,7 +1139,9 @@
                        else
                                break;
                }



Home | Main Index | Thread Index | Old Index