Subject: kern/11547: various fixes in the lfs syscalls and lfs_cleanerd
To: None <gnats-bugs@gnats.netbsd.org>
From: None <joff@gci-net.com>
List: netbsd-bugs
Date: 11/21/2000 21:13:16
>Number: 11547
>Category: kern
>Synopsis: various fixes in the lfs syscalls and lfs_cleanerd
>Confidential: no
>Severity: serious
>Priority: high
>Responsible: kern-bug-people
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Tue Nov 21 21:13:00 PST 2000
>Closed-Date:
>Last-Modified:
>Originator: Jesse Off
>Release: 1.5BETA2
>Organization:
>Environment:
NetBSD construct.home 1.5_BETA NetBSD 1.5_BETA (CONSTRUCT) #30: Tue Nov 21 19:51:41 MST 2000 root@construct.home:/usr/src/sys/arch/i386/compile/CONSTRUCT i386
>Description:
The other day, I got a panic:
lfs_unmount: still dirty blocks on ifile vnode
on filesystem unmount time. Upon reboot, the number of segs clean and
lfs_avail was wrong. What I think happened was the cleaner made a call to
lfs_segclean during the unmounting, dirtying blocks on the ifile segment
usage table and incrementing lfs_avail.
This got me to looking at the lfs_syscalls.c and I noticed the other
system calls could do bad things like this too. I've included a patch
to wrap all the system calls with vfs_busy() calls to delay unmounts
until the syscall exits. Also, while I was at it, I fixed a few other
things I found fishy in lfs_syscalls.c:
*) Another lfs_avail leak when cleaning up from an error in lfs_markv
*) A potential missing VOP_UNLOCK while cleaning up after a error in markv
*) Change the return value for an outdated/bad/unmounted fsid_t from
EINVAL to ENOENT which is consistent with the way vfs_busy() works.
What do you think about this?
*) wrapping lfs_markv, lfs_bmapv, and lfs_segclean in vfs_(un)busy()
calls (already mentioned above)
Then, while testing these fixes, I found and fixed some other potential
problems in lfs_cleanerd:
*) We do a superfluous free() when we error out of add_segment(). The
calling code expects there to still be a valid (unfreed)
SEGS_AND_BLOCKS struct even if add_segment returns error. The
effect of this is that we can pass bad addresses to markv().
*) If our filesystem is unmounted in add_segment() and all of a sudden
lfs_bmapv() starts returning ENOENT, we get stuck in a loop where
every segment has to be read in until we get into a context where we
_do_ check to see if the filesystem is unmounted and exit. I've
fixed this with a check to errno for ENOENT after add_segment fails.
If it is ENOENT, we break out of the loop.
*) There is potential in clean_segments() for an infinite loop if a
single lfs_markv() call fails. This is because the exit condition
for the loop is only satisfied when a variable that is only
decremented on lfs_markv() successes reaches 0. Again, the fix
is trivial. I also included a similar ENOENT short circuit if
lfs_markv() errs because the filesystem has been unmounted.
Note that messages will still appear in syslog from lfs_cleanerd about
lfs_bmapv, markv, segclean, etc failing with "No such file or directory",
when the fs is unmounted in the middle of a cleaning pass, but these
can be safely ignored.
>How-To-Repeat:
Well, not all these problems were repeatable. But I did my
testing by filling up a filesystem as root with
dd if=/dev/zero of=out bs=64k (Surprising, this works now without a panic!!)
then, when dd finally returns ENOSPC and the cleaner starts grinding
away endlessly, I unmount. Each time I mount, the cleaner automatically
restarts in a bmapv/markv cycle that I can then interrupt with an unmount.
Repeat this many times, trying to uninterrupt in different phases in
the cleaning cycle.
>Fix:
Two patches are inclosed below, one for /libexec/lfs_cleanerd/cleanerd.c
and one for /sys/ufs/lfs/lfs_syscalls.c
RCS file: /cvsroot/basesrc/libexec/lfs_cleanerd/cleanerd.c,v
retrieving revision 1.25
diff -u -r1.25 cleanerd.c
--- cleanerd.c 2000/11/13 22:12:50 1.25
+++ cleanerd.c 2000/11/22 04:24:38
@@ -61,6 +61,7 @@
#include <time.h>
#include <unistd.h>
#include <util.h>
+#include <errno.h>
#include <syslog.h>
@@ -527,7 +528,7 @@
if (add_segment(fsp, sp, sbp) < 0) {
syslog(LOG_NOTICE,"add_segment failed"
" segment %ld: %m", sp->sl_id);
- if (sbp->nsegs == 0)
+ if (sbp->nsegs == 0 && errno != ENOENT)
continue;
else
break;
@@ -543,7 +544,7 @@
if (add_segment(fsp, sp, sbp) != 0) {
syslog(LOG_NOTICE,"add_segment failed"
" segment %ld: %m", sp->sl_id);
- if (sbp->nsegs == 0)
+ if (sbp->nsegs == 0 && errno != ENOENT)
continue;
else
break;
@@ -846,8 +847,6 @@
--sbp->nsegs;
if (tba)
free(tba);
- if (sbp->ba)
- free(sbp->ba);
if (error) {
sp->su_flags |= SEGUSE_ERROR;
++cleaner_stats.segs_error;
@@ -886,10 +885,9 @@
if ((error = lfs_markv(&fsp->fi_statfsp->f_fsid,
bp, clean_blocks)) < 0) {
syslog(LOG_WARNING,"clean_segment: lfs_markv failed: %m");
- ++cleaner_stats.segs_error;
+ if (errno == ENOENT) break;
}
- else
- sbp->nb -= clean_blocks;
+ sbp->nb -= clean_blocks;
}
/* Clean up */
Index: lfs_syscalls.c
===================================================================
RCS file: /cvsroot/syssrc/sys/ufs/lfs/lfs_syscalls.c,v
retrieving revision 1.41.4.5
diff -u -r1.41.4.5 lfs_syscalls.c
--- lfs_syscalls.c 2000/11/01 03:53:38 1.41.4.5
+++ lfs_syscalls.c 2000/11/22 04:21:53
@@ -175,12 +175,13 @@
return (error);
if ((mntp = vfs_getvfs(&fsid)) == NULL)
- return (EINVAL);
+ return (ENOENT);
fs = VFSTOUFS(mntp)->um_lfs;
if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
return (error);
+
maxino = (dbtofsb(fs, VTOI(fs->lfs_ivnode)->i_ffs_blocks) -
fs->lfs_cleansz - fs->lfs_segtabsz) * fs->lfs_ifpb;
@@ -191,6 +192,9 @@
if (error)
goto err1;
+ if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
+ return (error);
+
/*
* This seglock is just to prevent the fact that we might have to sleep
* from allowing the possibility that our blocks might become
@@ -472,6 +476,7 @@
}
#endif
+ vfs_unbusy(mntp);
if(error)
return (error);
else if(do_again)
@@ -481,7 +486,13 @@
err2:
printf("lfs_markv err2\n");
+ if(lfs_fastvget_unlock) {
+ VOP_UNLOCK(vp, 0);
+ --numlocked;
+ }
lfs_vunref(vp);
+ --numrefed;
+
/* Free up fakebuffers -- have to take these from the LOCKED list */
again:
s = splbio();
@@ -494,6 +505,8 @@
splx(s);
goto again;
}
+ if(bp->b_flags & B_DELWRI)
+ fs->lfs_avail += btodb(bp->b_bcount);
bremfree(bp);
splx(s);
brelse(bp);
@@ -504,6 +517,11 @@
free(start, M_SEGMENT);
lfs_segunlock(fs);
vfs_unbusy(mntp);
+#ifdef DEBUG_LFS
+ if(numlocked != 0 || numrefed != 0) {
+ panic("lfs_markv: numlocked=%d numrefed=%d", numlocked, numrefed);
+ }
+#endif
return (error);
err1:
@@ -558,15 +576,18 @@
if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
return (error);
if ((mntp = vfs_getvfs(&fsid)) == NULL)
- return (EINVAL);
+ return (ENOENT);
ump = VFSTOUFS(mntp);
+ if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
+ return (error);
origcnt = cnt = SCARG(uap, blkcnt);
start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
error = copyin(SCARG(uap, blkiov), start, cnt * sizeof(BLOCK_INFO));
if (error) {
free(start, M_SEGMENT);
+ vfs_unbusy(mntp);
return (error);
}
@@ -584,6 +605,7 @@
printf("lfs_bmapv: attempt to clean current segment? (#%d)\n",
datosn(fs, fs->lfs_curseg));
free(start,M_SEGMENT);
+ vfs_unbusy(mntp);
return (EBUSY);
}
#endif /* DEBUG */
@@ -600,6 +622,7 @@
printf("lfs_bmapv: attempt to clean pending segment? (#%d)\n",
datosn(fs, fs->lfs_pending[j]));
free(start,M_SEGMENT);
+ vfs_unbusy(mntp);
return (EBUSY);
}
}
@@ -736,6 +759,7 @@
copyout(start, SCARG(uap, blkiov), origcnt * sizeof(BLOCK_INFO));
free(start, M_SEGMENT);
+ vfs_unbusy(mntp);
return 0;
}
@@ -772,16 +796,19 @@
if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
return (error);
if ((mntp = vfs_getvfs(&fsid)) == NULL)
- return (EINVAL);
+ return (ENOENT);
fs = VFSTOUFS(mntp)->um_lfs;
if (datosn(fs, fs->lfs_curseg) == SCARG(uap, segment))
return (EBUSY);
+ if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0)
+ return (error);
LFS_SEGENTRY(sup, fs, SCARG(uap, segment), bp);
if (sup->su_flags & SEGUSE_ACTIVE) {
brelse(bp);
+ vfs_unbusy(mntp);
return (EBUSY);
}
@@ -805,6 +832,7 @@
cip->avail = fs->lfs_avail - fs->lfs_ravail;
(void) VOP_BWRITE(bp);
wakeup(&fs->lfs_avail);
+ vfs_unbusy(mntp);
return (0);
}
>Release-Note:
>Audit-Trail:
>Unformatted: