Subject: split LFS vnode locks
To: None <perseant@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 12/15/2002 22:46:25
--NextPart-20021215224130-0409300
Content-Type: Text/Plain; charset=us-ascii
hi.
currently, since lfs_markv have to acquire vnode locks,
lfs_reserve have to release vnode locks before wait for lfs_avail.
however, releasing locks in lfs_reserve isn't safe because
we can't keep the vnodes/inodees from modifying without holding the locks.
i'd like to separate metadata (ie. block pointers) lock from vnode lock
so that lfs_reserve no longer need to release vnode locks as attached patch.
is it ok?
thanks.
YAMAMOTO Takashi
--NextPart-20021215224130-0409300
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="lfs.metalock.diff"
Index: ufs/inode.h
===================================================================
RCS file: /cvs/NetBSD/syssrc/sys/ufs/ufs/inode.h,v
retrieving revision 1.31
diff -u -p -r1.31 inode.h
--- ufs/inode.h 2002/12/01 00:12:12 1.31
+++ ufs/inode.h 2002/12/15 13:25:30
@@ -62,6 +62,9 @@ struct lfs_inode_ext {
off_t lfs_osize; /* size of file on disk */
u_int32_t lfs_effnblocks; /* number of blocks when i/o completes */
size_t lfs_fragsize[NDADDR]; /* size of on-disk direct blocks */
+ int lfs_writecount;
+ int lfs_readcount;
+ int lfs_writerpid;
};
/*
@@ -118,6 +121,9 @@ struct inode {
#define i_lfs_effnblks inode_ext.lfs.lfs_effnblocks
#define i_lfs_fragsize inode_ext.lfs.lfs_fragsize
#define i_lfs_osize inode_ext.lfs.lfs_osize
+#define i_lfs_writecount inode_ext.lfs.lfs_writecount
+#define i_lfs_readcount inode_ext.lfs.lfs_readcount
+#define i_lfs_writerpid inode_ext.lfs.lfs_writerpid
/*
* The on-disk dinode itself.
*/
Index: ufs/ufs_readwrite.c
===================================================================
RCS file: /cvs/NetBSD/syssrc/sys/ufs/ufs/ufs_readwrite.c,v
retrieving revision 1.44
diff -u -p -r1.44 ufs_readwrite.c
--- ufs/ufs_readwrite.c 2002/10/23 09:15:08 1.44
+++ ufs/ufs_readwrite.c 2002/12/15 13:25:30
@@ -415,6 +415,12 @@ WRITE(void *v)
else
flags &= ~B_CLRBUF;
+#ifdef LFS_READWRITE
+ error = lfs_reserve(fs, vp,
+ btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
+ if (error)
+ break;
+#endif
error = VOP_BALLOC(vp, uio->uio_offset, xfersize,
ap->a_cred, flags, &bp);
@@ -442,11 +448,10 @@ WRITE(void *v)
break;
}
#ifdef LFS_READWRITE
- if (!error)
- error = lfs_reserve(fs, vp, btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
(void)VOP_BWRITE(bp);
if (!error)
- lfs_reserve(fs, vp, -btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
+ lfs_reserve(fs, vp,
+ -btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
#else
if (ioflag & IO_SYNC)
(void)bwrite(bp);
Index: lfs/lfs.h
===================================================================
RCS file: /cvs/NetBSD/syssrc/sys/ufs/lfs/lfs.h,v
retrieving revision 1.42
diff -u -p -r1.42 lfs.h
--- lfs/lfs.h 2002/12/01 00:12:10 1.42
+++ lfs/lfs.h 2002/12/15 13:25:30
@@ -111,6 +111,35 @@
#define LFS_WAIT_BYTES (((bufpages >> 1) - (bufpages >> 3) - 10) * NBPG)
#define LFS_BUFWAIT 2
+/*
+ * lfs inode meta data lock.
+ *
+ * this protects direct/indirect block pointers.
+ */
+#define LFS_INITLOCKMETA(vp) lfs_initmetalock(vp)
+/* are we holding exclusive lock? */
+#define _LFS_OWN_LOCK_META(vp) \
+ (VTOI(vp)->i_lfs_writerpid == curproc->p_pid)
+#define LFS_OWN_LOCK_META(vp) \
+ (VTOI(vp)->i_lfs_writecount != 0 && _LFS_OWN_LOCK_META(vp))
+
+/* acquire/release write lock */
+#define LFS_LOCK_META(vp) lfs_lockmeta(vp)
+#define LFS_UNLOCK_META(vp) lfs_unlockmeta(vp)
+
+/* acquire/release read lock */
+#define LFS_READ_LOCK_META(vp) lfs_readlockmeta(vp)
+#define LFS_READ_UNLOCK_META(vp) lfs_readunlockmeta(vp)
+
+/* assert we're (not) holding read or write lock */
+#define LFS_ASSERT_LOCK_META(vp) \
+ KASSERT(VTOI(vp)->i_lfs_readcount != 0 || LFS_OWN_LOCK_META(vp))
+#define LFS_ASSERT_UNLOCK_META(vp) \
+ KASSERT(!LFS_OWN_LOCK_META(vp))
+
+/* macros for passing locks to another thread */
+#define LFS_PASS_METALOCK_TO_AIODONED(vp) /* nothing */
+
#define LFS_LOCK_BUF(bp) do { \
if (((bp)->b_flags & (B_LOCKED | B_CALL)) == 0) { \
++locked_queue_count; \
Index: lfs/lfs_alloc.c
===================================================================
RCS file: /cvs/NetBSD/syssrc/sys/ufs/lfs/lfs_alloc.c,v
retrieving revision 1.57
diff -u -p -r1.57 lfs_alloc.c
--- lfs/lfs_alloc.c 2002/11/24 08:43:26 1.57
+++ lfs/lfs_alloc.c 2002/12/15 13:25:30
@@ -490,6 +490,8 @@ lfs_vcreate(struct mount *mp, ino_t ino,
/* Why was IN_MODIFIED ever set here? */
/* LFS_SET_UINO(ip, IN_CHANGE | IN_MODIFIED); */
+ LFS_INITLOCKMETA(vp);
+
#ifdef DEBUG_LFS_VNLOCK
if (ino == LFS_IFILE_INUM)
vp->v_vnlock->lk_wmesg = "inlock";
Index: lfs/lfs_balloc.c
===================================================================
RCS file: /cvs/NetBSD/syssrc/sys/ufs/lfs/lfs_balloc.c,v
retrieving revision 1.34
diff -u -p -r1.34 lfs_balloc.c
--- lfs/lfs_balloc.c 2002/12/11 13:34:14 1.34
+++ lfs/lfs_balloc.c 2002/12/15 13:25:30
@@ -134,6 +134,7 @@ lfs_balloc(void *v)
ufs_daddr_t lbn, lastblock;
int bb, bcount;
int error, frags, i, nsize, osize, num;
+ int needunlockmeta = 0;
vp = ap->a_vp;
ip = VTOI(vp);
@@ -169,7 +170,7 @@ lfs_balloc(void *v)
if ((error = lfs_fragextend(vp, osize, fs->lfs_bsize,
lastblock, &bp,
ap->a_cred)))
- return (error);
+ goto out;
ip->i_ffs_size = (lastblock + 1) * fs->lfs_bsize;
uvm_vnp_setsize(vp, ip->i_ffs_size);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
@@ -202,22 +203,25 @@ lfs_balloc(void *v)
if (nsize <= osize) {
/* No need to extend */
if ((error = bread(vp, lbn, osize, NOCRED, &bp)))
- return error;
+ goto out;
} else {
/* Extend existing block */
if ((error =
lfs_fragextend(vp, osize, nsize, lbn, &bp,
ap->a_cred)))
- return error;
+ goto out;
}
*ap->a_bpp = bp;
}
- return 0;
+ error = 0;
+ goto out;
}
+ LFS_LOCK_META(vp);
+ needunlockmeta = 1;
error = ufs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL );
if (error)
- return (error);
+ goto out;
/*
* Do byte accounting all at once, so we can gracefully fail *before*
* we start assigning blocks.
@@ -236,7 +240,8 @@ lfs_balloc(void *v)
ip->i_lfs->lfs_bfree -= bcount;
ip->i_lfs_effnblks += bcount;
} else {
- return ENOSPC;
+ error = ENOSPC;
+ goto out;
}
if (daddr == UNASSIGNED) {
@@ -270,7 +275,7 @@ lfs_balloc(void *v)
UNWRITTEN;
idaddr = ((daddr_t *)ibp->b_data)[indirs[i].in_off];
if ((error = VOP_BWRITE(ibp))) {
- return error;
+ goto out;
}
}
}
@@ -328,11 +333,16 @@ lfs_balloc(void *v)
bp->b_blkno = daddr;
bp->b_flags |= B_READ;
VOP_STRATEGY(bp);
- return (biowait(bp));
+ error = biowait(bp);
+ goto out;
}
}
- return (0);
+ error = 0;
+out:
+ if (needunlockmeta)
+ LFS_UNLOCK_META(vp);
+ return (error);
}
/* VOP_BWRITE 1 time */
Index: lfs/lfs_bio.c
===================================================================
RCS file: /cvs/NetBSD/syssrc/sys/ufs/lfs/lfs_bio.c,v
retrieving revision 1.47
diff -u -p -r1.47 lfs_bio.c
--- lfs/lfs_bio.c 2002/11/27 11:36:40 1.47
+++ lfs/lfs_bio.c 2002/12/15 13:25:30
@@ -110,20 +110,10 @@ extern int lfs_dostats;
/*
* Try to reserve some blocks, prior to performing a sensitive operation that
- * requires the vnode lock to be honored. If there is not enough space, give
- * up the vnode lock temporarily and wait for the space to become available.
+ * requires the vnode lock to be honored. If there is not enough space, wait
+ * for the space to become available.
*
* Called with vp locked. (Note nowever that if fsb < 0, vp is ignored.)
- *
- * XXX YAMT - it isn't safe to unlock vp here
- * because the node might be modified while we sleep.
- * (eg. cached states like i_offset might be stale,
- * the vnode might be truncated, etc..)
- * maybe we should have a way to restart the vnode op. (EVOPRESTART?)
- *
- * XXX YAMT - we unlock the vnode so that cleaner can lock it.
- * but it isn't enough. eg. for VOP_REMOVE, we should unlock the vnode that
- * is going to be removed as well.
*/
int
lfs_reserve(struct lfs *fs, struct vnode *vp, int fsb)
@@ -132,10 +122,11 @@ lfs_reserve(struct lfs *fs, struct vnode
struct buf *bp;
int error, slept;
+ LFS_ASSERT_UNLOCK_META(vp);
+
slept = 0;
while (fsb > 0 && !lfs_fits(fs, fsb + fs->lfs_ravail) &&
vp != fs->lfs_unlockvp) {
- VOP_UNLOCK(vp, 0);
if (!slept) {
#ifdef DEBUG
@@ -155,7 +146,6 @@ lfs_reserve(struct lfs *fs, struct vnode
error = tsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_reserve",
0);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */
if (error)
return error;
}
@@ -261,6 +251,8 @@ lfs_bwrite_ext(struct buf *bp, int flags
struct inode *ip;
int fsb, error, s;
+ KASSERT(bp->b_flags & B_BUSY);
+ KASSERT(flags & BW_CLEAN || !(bp->b_flags & B_CALL));
/*
* Don't write *any* blocks if we're mounted read-only.
* In particular the cleaner can't write blocks either.
@@ -300,7 +292,7 @@ lfs_bwrite_ext(struct buf *bp, int flags
}
ip = VTOI(bp->b_vp);
- if (bp->b_flags & B_CALL) {
+ if (flags & BW_CLEAN) {
LFS_SET_UINO(ip, IN_CLEANING);
} else {
LFS_SET_UINO(ip, IN_MODIFIED);
@@ -405,6 +397,8 @@ lfs_check(struct vnode *vp, ufs_daddr_t
struct inode *ip;
extern int lfs_dirvcount;
+ LFS_ASSERT_UNLOCK_META(vp);
+
error = 0;
ip = VTOI(vp);
@@ -591,4 +585,94 @@ lfs_countlocked(int *count, long *bytes,
*count = n;
*bytes = size;
return;
+}
+
+/*
+ * XXX we don't use lockmgr locks here because:
+ * - we want to acquire readlock recursively. (lfs_strategy)
+ * but doing so isn't safe with lockmgr LK_SHARED lock.
+ * - we want to unlock readlock in interrupt context. (lfs_strategy_iodone)
+ *
+ * XXX should have interlocks.
+ */
+void
+lfs_lockmeta(struct vnode *vp)
+{
+ struct inode *ip = VTOI(vp);
+ int s = splbio();
+
+ KASSERT(curproc != NULL);
+
+ while (ip->i_lfs_readcount ||
+ (ip->i_lfs_writecount && !_LFS_OWN_LOCK_META(vp))) {
+ ltsleep(&ip->i_lfs_writecount, PINOD, "lfsmetawrite", 0, 0);
+ }
+ KASSERT(ip->i_lfs_writerpid == NO_PID ||
+ ip->i_lfs_writerpid == curproc->p_pid);
+ ip->i_lfs_writecount++;
+ ip->i_lfs_writerpid = curproc->p_pid;
+
+ splx(s);
+}
+
+void
+lfs_unlockmeta(struct vnode *vp)
+{
+ struct inode *ip = VTOI(vp);
+ int s = splbio();
+
+ KASSERT(curproc != NULL);
+ KASSERT(ip->i_lfs_readcount == 0);
+ KASSERT(ip->i_lfs_writecount > 0);
+ KASSERT(ip->i_lfs_writerpid == curproc->p_pid);
+
+ if ((--ip->i_lfs_writecount) == 0) {
+ ip->i_lfs_writerpid = NO_PID;
+ wakeup(&ip->i_lfs_writecount);
+ }
+
+ splx(s);
+}
+
+void
+lfs_readlockmeta(struct vnode *vp)
+{
+ struct inode *ip = VTOI(vp);
+ int s = splbio();
+
+ KASSERT(curproc != NULL);
+
+ while (ip->i_lfs_writecount) {
+ ltsleep(&ip->i_lfs_writecount, PINOD, "lfsmetaread", 0, 0);
+ }
+ KASSERT(ip->i_lfs_writerpid == NO_PID);
+ ip->i_lfs_readcount++;
+
+ splx(s);
+}
+
+/*
+ * lfs_readunlockmeta can be called in interrupt context.
+ */
+void
+lfs_readunlockmeta(struct vnode *vp)
+{
+ struct inode *ip = VTOI(vp);
+
+ KASSERT(ip->i_lfs_readcount > 0);
+ KASSERT(ip->i_lfs_writecount == 0);
+ KASSERT(ip->i_lfs_writerpid == NO_PID);
+
+ if ((--ip->i_lfs_readcount) == 0)
+ wakeup(&ip->i_lfs_writecount);
+}
+
+void
+lfs_initmetalock(struct vnode *vp)
+{
+ struct inode *ip = VTOI(vp);
+
+ ip->i_lfs_readcount = 0;
+ ip->i_lfs_writecount = 0;
+ ip->i_lfs_writerpid = NO_PID;
}
Index: lfs/lfs_extern.h
===================================================================
RCS file: /cvs/NetBSD/syssrc/sys/ufs/lfs/lfs_extern.h,v
retrieving revision 1.32
diff -u -p -r1.32 lfs_extern.h
--- lfs/lfs_extern.h 2002/12/01 00:12:11 1.32
+++ lfs/lfs_extern.h 2002/12/15 13:25:30
@@ -136,6 +136,11 @@ struct buf *lfs_newbuf(struct lfs *, str
#endif
void lfs_countlocked(int *, long *, char *);
int lfs_reserve(struct lfs *, struct vnode *, int);
+void lfs_lockmeta(struct vnode *);
+void lfs_unlockmeta(struct vnode *);
+void lfs_readlockmeta(struct vnode *);
+void lfs_readunlockmeta(struct vnode *);
+void lfs_initmetalock(struct vnode *);
/* lfs_cksum.c */
u_int32_t cksum(void *, size_t);
@@ -178,6 +183,7 @@ void lfs_shellsort(struct buf **, ufs_da
int lfs_vref(struct vnode *);
void lfs_vunref(struct vnode *);
void lfs_vunref_head(struct vnode *);
+void lfs_generic_callback(struct buf *, void (*)(struct buf *));
/* lfs_subr.c */
void lfs_seglock(struct lfs *, unsigned long);
@@ -235,6 +241,7 @@ int lfs_write (void *);
int lfs_whiteout (void *);
int lfs_getpages (void *);
int lfs_putpages (void *);
+int lfs_strategy (void *);
__END_DECLS
extern int lfs_mount_type;
Index: lfs/lfs_inode.c
===================================================================
RCS file: /cvs/NetBSD/syssrc/sys/ufs/lfs/lfs_inode.c,v
retrieving revision 1.60
diff -u -p -r1.60 lfs_inode.c
--- lfs/lfs_inode.c 2002/09/27 15:38:05 1.60
+++ lfs/lfs_inode.c 2002/12/15 13:25:30
@@ -187,6 +187,7 @@ lfs_update(void *v)
/* If sync, push back the vnode and any dirty blocks it may have. */
if ((ap->a_flags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT) {
+ int error;
/* Avoid flushing VDIROP. */
++fs->lfs_diropwait;
while (vp->v_flag & VDIROP) {
@@ -205,7 +206,8 @@ lfs_update(void *v)
twice? */
}
--fs->lfs_diropwait;
- return lfs_vflush(vp);
+ error = lfs_vflush(vp);
+ return error;
}
return 0;
}
@@ -379,6 +381,7 @@ lfs_truncate(void *v)
* freeing blocks. lastiblock values are also normalized to -1
* for calls to lfs_indirtrunc below.
*/
+ LFS_LOCK_META(ovp);
memcpy((caddr_t)newblks, (caddr_t)&oip->i_ffs_db[0], sizeof newblks);
for (level = TRIPLE; level >= SINGLE; level--)
if (lastiblock[level] < 0) {
@@ -497,6 +500,7 @@ done:
oip->i_lfs_effnblks -= blocksreleased;
oip->i_ffs_blocks -= real_released;
fs->lfs_bfree += blocksreleased;
+ LFS_UNLOCK_META(ovp);
#ifdef DIAGNOSTIC
if (oip->i_ffs_size == 0 && oip->i_ffs_blocks != 0) {
printf("lfs_truncate: truncate to 0 but %d blocks on inode\n",
@@ -584,6 +588,8 @@ lfs_indirtrunc(struct inode *ip, ufs_dad
int nblocks, blocksreleased = 0, real_released = 0;
int error = 0, allerror = 0;
+ LFS_ASSERT_LOCK_META(ITOV(ip));
+
/*
* Calculate index in current block of last
* block to be kept. -1 indicates the entire
@@ -706,6 +712,7 @@ lfs_vtruncbuf(struct vnode *vp, daddr_t
int s, error;
struct lfs *fs;
+ LFS_ASSERT_LOCK_META(vp);
fs = VTOI(vp)->i_lfs;
s = splbio();
Index: lfs/lfs_segment.c
===================================================================
RCS file: /cvs/NetBSD/syssrc/sys/ufs/lfs/lfs_segment.c,v
retrieving revision 1.84
diff -u -p -r1.84 lfs_segment.c
--- lfs/lfs_segment.c 2002/12/13 14:40:02 1.84
+++ lfs/lfs_segment.c 2002/12/15 13:25:30
@@ -109,7 +109,6 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_segment.
extern int count_lock_queue(void);
extern struct simplelock vnode_free_list_slock; /* XXX */
-static void lfs_generic_callback(struct buf *, void (*)(struct buf *));
static void lfs_super_aiodone(struct buf *);
static void lfs_cluster_aiodone(struct buf *);
static void lfs_cluster_callback(struct buf *);
@@ -232,8 +231,7 @@ lfs_vflush(struct vnode *vp)
{
fs->lfs_avail += btofsb(fs, bp->b_bcount);
wakeup(&fs->lfs_avail);
- lfs_freebuf(bp);
- bp = NULL;
+ (*bp->b_iodone)(bp);
break;
}
}
@@ -270,8 +268,7 @@ lfs_vflush(struct vnode *vp)
/* Copied from lfs_writeseg */
if (bp->b_flags & B_CALL) {
/* if B_CALL, it was created with newbuf */
- lfs_freebuf(bp);
- bp = NULL;
+ (*bp->b_iodone)(bp);
} else {
bremfree(bp);
LFS_UNLOCK_BUF(bp);
@@ -404,6 +401,10 @@ lfs_writevnodes(struct lfs *fs, struct m
printf("lfs_writevnodes: starting over\n");
goto loop;
}
+
+ if (vp->v_type == VNON) {
+ continue;
+ }
ip = VTOI(vp);
if ((op == VN_DIROP && !(vp->v_flag & VDIROP)) ||
@@ -416,10 +417,6 @@ lfs_writevnodes(struct lfs *fs, struct m
vndebug(vp,"empty");
continue;
}
-
- if (vp->v_type == VNON) {
- continue;
- }
if (op == VN_CLEAN && ip->i_number != LFS_IFILE_INUM
&& vp != fs->lfs_flushvp
@@ -434,20 +431,8 @@ lfs_writevnodes(struct lfs *fs, struct m
}
needs_unlock = 0;
- if (VOP_ISLOCKED(vp)) {
- if (vp != fs->lfs_ivnode &&
- vp->v_lock.lk_lockholder != curproc->p_pid) {
-#ifdef DEBUG_LFS
- printf("lfs_writevnodes: not writing ino %d,"
- " locked by pid %d\n",
- VTOI(vp)->i_number,
- vp->v_lock.lk_lockholder);
-#endif
- lfs_vunref(vp);
- continue;
- }
- } else if (vp != fs->lfs_ivnode) {
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ if (vp != fs->lfs_ivnode) {
+ LFS_LOCK_META(vp);
needs_unlock = 1;
}
@@ -481,8 +466,9 @@ lfs_writevnodes(struct lfs *fs, struct m
inodes_written++;
}
- if (needs_unlock)
- VOP_UNLOCK(vp, 0);
+ if (needs_unlock) {
+ LFS_UNLOCK_META(vp);
+ }
if (lfs_clean_vnhead && only_cleaning)
lfs_vunref_head(vp);
@@ -642,7 +628,7 @@ lfs_segwrite(struct mount *mp, int flags
fs->lfs_flags &= ~LFS_IFDIRTY;
ip = VTOI(vp);
- /* if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL) */
+ if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL)
lfs_writefile(fs, sp, vp);
if (ip->i_flag & IN_ALLMOD)
++did_ckp;
@@ -787,6 +773,11 @@ lfs_writefile(struct lfs *fs, struct seg
if (frag > 1)
panic("lfs_writefile: more than one fragment!");
#endif
+#ifdef DEBUG_LFS
+ if (frag != 0 && ip->i_ffs_size > fs->lfs_bsize * NDADDR)
+ printf("lfs_writefile: not writing indirect blocks for ino %d due to "
+ "%d fragments, size=%lu\n", ip->i_number, frag, (ulong)ip->i_ffs_size);
+#endif
if (IS_FLUSHING(fs, vp) ||
(frag == 0 && (lfs_writeindir || (sp->seg_flags & SEGM_CKP)))) {
lfs_gather(fs, sp, vp, lfs_match_indir);
@@ -919,9 +910,12 @@ lfs_writeinode(struct lfs *fs, struct se
#endif
}
- if (ip->i_number == LFS_IFILE_INUM) /* We know sp->idp == NULL */
+ if (ip->i_number == LFS_IFILE_INUM) {
+ /* We know sp->idp == NULL */
+ KASSERT(sp->idp == NULL);
sp->idp = ((struct dinode *)bp->b_data) +
(sp->ninodes % INOPB(fs));
+ }
if (gotblk) {
LFS_LOCK_BUF(bp);
brelse(bp);
@@ -1170,6 +1164,7 @@ lfs_updatemeta(struct segment *sp)
if (vp == NULL || nblocks == 0)
return;
+ LFS_LOCK_META(vp);
/* Sort the blocks. */
/*
* XXX KS - We have to sort even if the blocks come from the
@@ -1333,6 +1328,7 @@ lfs_updatemeta(struct segment *sp)
if (lbn >= 0 && lbn < NDADDR)
ip->i_lfs_fragsize[lbn] = sbp->b_bcount;
}
+ LFS_UNLOCK_META(vp);
}
/*
@@ -1618,7 +1614,7 @@ lfs_writeseg(struct lfs *fs, struct segm
*/
if ((nblocks = sp->cbpp - sp->bpp) == 1)
return (0);
-
+
i_dev = VTOI(fs->lfs_ivnode)->i_dev;
devvp = VTOI(fs->lfs_ivnode)->i_devvp;
@@ -1725,8 +1721,7 @@ lfs_writeseg(struct lfs *fs, struct segm
if (changed) {
bp->b_flags &= ~(B_ERROR | B_GATHERED);
if (bp->b_flags & B_CALL) {
- lfs_freebuf(bp);
- bp = NULL;
+ (*bp->b_iodone)(bp);
} else {
/* Still on free list, leave it there */
s = splbio();
@@ -1746,8 +1741,7 @@ lfs_writeseg(struct lfs *fs, struct segm
bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI |
B_GATHERED);
if (bp->b_flags & B_CALL) {
- lfs_freebuf(bp);
- bp = NULL;
+ (*bp->b_iodone)(bp);
} else {
bremfree(bp);
bp->b_flags |= B_DONE;
@@ -2085,7 +2079,7 @@ lfs_match_data(struct lfs *fs, struct bu
int
lfs_match_indir(struct lfs *fs, struct buf *bp)
{
- int lbn;
+ daddr_t lbn;
lbn = bp->b_lblkno;
return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0);
@@ -2094,7 +2088,7 @@ lfs_match_indir(struct lfs *fs, struct b
int
lfs_match_dindir(struct lfs *fs, struct buf *bp)
{
- int lbn;
+ daddr_t lbn;
lbn = bp->b_lblkno;
return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1);
@@ -2103,7 +2097,7 @@ lfs_match_dindir(struct lfs *fs, struct
int
lfs_match_tindir(struct lfs *fs, struct buf *bp)
{
- int lbn;
+ daddr_t lbn;
lbn = bp->b_lblkno;
return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2);
@@ -2261,7 +2255,7 @@ lfs_cluster_aiodone(struct buf *bp)
free(cl, M_SEGMENT);
}
-static void
+void
lfs_generic_callback(struct buf *bp, void (*aiodone)(struct buf *))
{
/* reset b_iodone for when this is a single-buf i/o. */
Index: lfs/lfs_syscalls.c
===================================================================
RCS file: /cvs/NetBSD/syssrc/sys/ufs/lfs/lfs_syscalls.c,v
retrieving revision 1.73
diff -u -p -r1.73 lfs_syscalls.c
--- lfs/lfs_syscalls.c 2002/11/24 16:39:13 1.73
+++ lfs/lfs_syscalls.c 2002/12/15 13:25:30
@@ -94,8 +94,9 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_syscalls
#include <ufs/lfs/lfs_extern.h>
/* Flags for return from lfs_fastvget */
-#define FVG_UNLOCK 0x01 /* Needs to be unlocked */
-#define FVG_PUT 0x02 /* Needs to be vput() */
+#define FVG_UNLOCK 0x01 /* Needs to be VOP_UNLOCK'ed */
+#define FVG_PUT 0x02 /* Needs to be vput() */
+#define FVG_METAUNLOCK 0x04 /* Needs to be LFS_READ_UNLOCK_META'ed */
/* Max block count for lfs_markv() */
#define MARKV_MAXBLKCNT 65536
@@ -128,7 +129,9 @@ extern TAILQ_HEAD(bqueues, buf) bufqueue
static int lfs_bmapv(struct proc *, fsid_t *, BLOCK_INFO *, int);
static int lfs_markv(struct proc *, fsid_t *, BLOCK_INFO *, int);
+#if 0
static void lfs_fakebuf_iodone(struct buf *);
+#endif
/*
* sys_lfs_markv:
@@ -259,7 +262,7 @@ lfs_markv(struct proc *p, fsid_t *fsidp,
#ifdef CHECK_COPYIN
int i;
#endif /* CHECK_COPYIN */
- int numlocked = 0, numrefed = 0;
+ int numlocked = 0, numrefed = 0, nummetalocked = 0;
ino_t maxino;
size_t obsize;
@@ -329,7 +332,11 @@ lfs_markv(struct proc *p, fsid_t *fsidp,
if (ip->i_flag & (IN_MODIFIED|IN_CLEANING))
iwritten++;
#endif
- if (lfs_fastvget_unlock) {
+ if (lfs_fastvget_unlock & FVG_METAUNLOCK) {
+ LFS_READ_UNLOCK_META(vp);
+ nummetalocked--;
+ }
+ if (lfs_fastvget_unlock & FVG_UNLOCK) {
VOP_UNLOCK(vp, 0);
numlocked--;
}
@@ -368,8 +375,12 @@ lfs_markv(struct proc *p, fsid_t *fsidp,
? blkp->bi_bp
: NULL),
&lfs_fastvget_unlock);
- if (lfs_fastvget_unlock)
- numlocked++;
+ if (lfs_fastvget_unlock & FVG_METAUNLOCK)
+ nummetalocked++;
+ if (lfs_fastvget_unlock & FVG_UNLOCK) {
+ lfs_fastvget_unlock &= ~FVG_UNLOCK;
+ VOP_UNLOCK(vp, 0);
+ }
if (!error) {
numrefed++;
@@ -493,6 +504,7 @@ lfs_markv(struct proc *p, fsid_t *fsidp,
" size=%d\n", blkp->bi_size);
bp = getblk(vp, blkp->bi_lbn, blkp->bi_size, 0, 0);
if (!(bp->b_flags & (B_DONE|B_DELWRI))) { /* B_CACHE */
+ KASSERT(!(bp->b_flags & B_LOCKED));
/*
* The block in question was not found
* in the cache; i.e., the block that
@@ -522,13 +534,25 @@ lfs_markv(struct proc *p, fsid_t *fsidp,
if (ip->i_flag & (IN_MODIFIED|IN_CLEANING))
iwritten++;
#endif
- if (lfs_fastvget_unlock) {
+ if (lfs_fastvget_unlock & FVG_METAUNLOCK) {
+ LFS_READ_UNLOCK_META(vp);
+ nummetalocked--;
+ }
+ if (lfs_fastvget_unlock & FVG_UNLOCK) {
VOP_UNLOCK(vp, 0);
numlocked--;
}
lfs_vunref(vp);
numrefed--;
}
+
+#ifdef DEBUG_LFS
+ printf("%d]",iwritten);
+ if (numlocked != 0 || numrefed != 0 || nummetalocked != 0) {
+ panic("lfs_markv: numlocked=%d numrefed=%d nummetalocked = %d",
+ numlocked, numrefed, nummetalocked);
+ }
+#endif
/*
* The last write has to be SEGM_SYNC, because of calling semantics.
@@ -539,13 +563,6 @@ lfs_markv(struct proc *p, fsid_t *fsidp,
lfs_segwrite(mntp, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC);
lfs_segunlock(fs);
-
-#ifdef DEBUG_LFS
- printf("%d]",iwritten);
- if (numlocked != 0 || numrefed != 0) {
- panic("lfs_markv: numlocked=%d numrefed=%d", numlocked, numrefed);
- }
-#endif
vfs_unbusy(mntp);
if (error)
@@ -556,13 +573,18 @@ lfs_markv(struct proc *p, fsid_t *fsidp,
return 0;
err2:
+ KASSERT(v_daddr != LFS_UNUSED_DADDR);
printf("lfs_markv err2\n");
- if (lfs_fastvget_unlock) {
+ if (lfs_fastvget_unlock & FVG_METAUNLOCK) {
+ LFS_READ_UNLOCK_META(vp);
+ nummetalocked--;
+ }
+ if (lfs_fastvget_unlock & FVG_UNLOCK) {
VOP_UNLOCK(vp, 0);
- --numlocked;
+ numlocked--;
}
lfs_vunref(vp);
- --numrefed;
+ numrefed--;
/* Free up fakebuffers -- have to take these from the LOCKED list */
again:
@@ -588,8 +610,9 @@ lfs_markv(struct proc *p, fsid_t *fsidp,
lfs_segunlock(fs);
vfs_unbusy(mntp);
#ifdef DEBUG_LFS
- if (numlocked != 0 || numrefed != 0) {
- panic("lfs_markv: numlocked=%d numrefed=%d", numlocked, numrefed);
+ if (numlocked != 0 || numrefed != 0 || nummetalocked != 0) {
+ panic("lfs_markv: numlocked=%d numrefed=%d nummetalocked=%d",
+ numlocked, numrefed, nummetalocked);
}
#endif
@@ -711,7 +734,7 @@ lfs_bmapv(struct proc *p, fsid_t *fsidp,
ino_t lastino;
ufs_daddr_t v_daddr;
int cnt, error, need_unlock = 0;
- int numlocked = 0, numrefed = 0;
+ int numlocked = 0, numrefed = 0, nummetalocked = 0;
lfs_cleaner_pid = p->p_pid;
@@ -744,7 +767,11 @@ lfs_bmapv(struct proc *p, fsid_t *fsidp,
* v_daddr.
*/
if (v_daddr != LFS_UNUSED_DADDR) {
- if (need_unlock) {
+ if (need_unlock & FVG_METAUNLOCK) {
+ LFS_READ_UNLOCK_META(vp);
+ nummetalocked--;
+ }
+ if (need_unlock & FVG_UNLOCK) {
VOP_UNLOCK(vp, 0);
numlocked--;
}
@@ -773,27 +800,15 @@ lfs_bmapv(struct proc *p, fsid_t *fsidp,
*/
vp = ufs_ihashlookup(ump->um_dev, blkp->bi_inode);
if (vp != NULL && !(vp->v_flag & VXLOCK)) {
- ip = VTOI(vp);
if (lfs_vref(vp)) {
v_daddr = LFS_UNUSED_DADDR;
need_unlock = 0;
continue;
}
numrefed++;
- if (VOP_ISLOCKED(vp)) {
-#ifdef DEBUG_LFS
- printf("lfs_bmapv: inode %d inlocked\n",ip->i_number);
-#endif
- v_daddr = LFS_UNUSED_DADDR;
- need_unlock = 0;
- lfs_vunref(vp);
- --numrefed;
- continue;
- } else {
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- need_unlock = FVG_UNLOCK;
- numlocked++;
- }
+ LFS_READ_LOCK_META(vp);
+ nummetalocked++;
+ need_unlock = FVG_METAUNLOCK;
} else {
error = VFS_VGET(mntp, blkp->bi_inode, &vp);
if (error) {
@@ -804,8 +819,10 @@ lfs_bmapv(struct proc *p, fsid_t *fsidp,
need_unlock = 0;
continue;
} else {
- need_unlock = FVG_PUT;
- numlocked++;
+ LFS_READ_LOCK_META(vp);
+ VOP_UNLOCK(vp, 0);
+ need_unlock = FVG_METAUNLOCK;
+ nummetalocked++;
numrefed++;
}
}
@@ -855,7 +872,11 @@ lfs_bmapv(struct proc *p, fsid_t *fsidp,
* of a usable vnode in vp is signaled by a valid v_daddr.
*/
if (v_daddr != LFS_UNUSED_DADDR) {
- if (need_unlock) {
+ if (need_unlock & FVG_METAUNLOCK) {
+ LFS_READ_UNLOCK_META(vp);
+ nummetalocked--;
+ }
+ if (need_unlock & FVG_UNLOCK) {
VOP_UNLOCK(vp, 0);
numlocked--;
}
@@ -863,9 +884,9 @@ lfs_bmapv(struct proc *p, fsid_t *fsidp,
numrefed--;
}
- if (numlocked != 0 || numrefed != 0) {
- panic("lfs_bmapv: numlocked=%d numrefed=%d", numlocked,
- numrefed);
+ if (numlocked != 0 || numrefed != 0 || nummetalocked != 0) {
+ panic("lfs_bmapv: numlocked=%d numrefed=%d nummetalocked=%d",
+ numlocked, numrefed, nummetalocked);
}
vfs_unbusy(mntp);
@@ -1051,7 +1072,6 @@ extern struct lock ufs_hashlock;
int
lfs_fasthashget(dev_t dev, ino_t ino, int *need_unlock, struct vnode **vpp)
{
- struct inode *ip;
/*
* This is playing fast and loose. Someone may have the inode
@@ -1067,25 +1087,13 @@ lfs_fasthashget(dev_t dev, ino_t ino, in
return EAGAIN;
#endif
}
- ip = VTOI(*vpp);
if (lfs_vref(*vpp)) {
clean_inlocked++;
return EAGAIN;
}
- if (VOP_ISLOCKED(*vpp)) {
-#ifdef DEBUG_LFS
- printf("lfs_fastvget: ino %d inlocked by pid %d\n",
- ip->i_number, (*vpp)->v_lock.lk_lockholder);
-#endif
- clean_inlocked++;
-#ifdef LFS_EAGAIN_FAIL
- lfs_vunref(*vpp);
- return EAGAIN;
-#endif /* LFS_EAGAIN_FAIL */
- } else {
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
- *need_unlock |= FVG_UNLOCK;
- }
+ KASSERT((*need_unlock & FVG_METAUNLOCK) == 0);
+ LFS_READ_LOCK_META(*vpp);
+ *need_unlock |= FVG_METAUNLOCK;
} else
*vpp = NULL;
@@ -1242,7 +1250,11 @@ lfs_fastvget(struct mount *mp, ino_t ino
ip->i_devvp = ump->um_devvp;
VREF(ip->i_devvp);
*vpp = vp;
- *need_unlock |= FVG_PUT;
+ LFS_READ_LOCK_META(vp);
+ *need_unlock |= FVG_METAUNLOCK;
+
+ KASSERT(VOP_ISLOCKED(vp));
+ *need_unlock |= FVG_UNLOCK;
uvm_vnp_setsize(vp, ip->i_ffs_size);
@@ -1283,6 +1295,8 @@ lfs_fakebuf(struct lfs *fs, struct vnode
bp = lfs_newbuf(VTOI(vp)->i_lfs, vp, lbn, size);
error = copyin(uaddr, bp->b_data, size);
if (error) {
+ panic("lfs_newbuf: copyin failed: uaddr=%p, error=%d\n",
+ (void *)uaddr, error);
lfs_freebuf(bp);
return NULL;
}
Index: lfs/lfs_vnops.c
===================================================================
RCS file: /cvs/NetBSD/syssrc/sys/ufs/lfs/lfs_vnops.c,v
retrieving revision 1.69
diff -u -p -r1.69 lfs_vnops.c
--- lfs/lfs_vnops.c 2002/11/24 08:23:41 1.69
+++ lfs/lfs_vnops.c 2002/12/15 13:25:30
@@ -138,7 +138,7 @@ const struct vnodeopv_entry_desc lfs_vno
{ &vop_lock_desc, ufs_lock }, /* lock */
{ &vop_unlock_desc, ufs_unlock }, /* unlock */
{ &vop_bmap_desc, ufs_bmap }, /* bmap */
- { &vop_strategy_desc, ufs_strategy }, /* strategy */
+ { &vop_strategy_desc, lfs_strategy }, /* strategy */
{ &vop_print_desc, ufs_print }, /* print */
{ &vop_islocked_desc, ufs_islocked }, /* islocked */
{ &vop_pathconf_desc, ufs_pathconf }, /* pathconf */
@@ -339,6 +339,16 @@ lfs_inactive(void *v)
} */ *ap = v;
struct inode *ip = VTOI(ap->a_vp);
+ /*
+ * wait until meta locks go away.
+ */
+ while (ip->i_lfs_writecount || ip->i_lfs_readcount) {
+ printf("lfs_inactive: waiting for vp=%p, ino=%d\n",
+ ap->a_vp, ip->i_number);
+ ltsleep(&ip->i_lfs_writecount, PVFS, "lfs_inactive", 0, 0);
+ }
+ KASSERT(ip->i_lfs_writerpid == NO_PID);
+
if (ip->i_flag & IN_ADIROP)
--ip->i_lfs->lfs_nadirop;
ip->i_flag &= ~IN_ADIROP;
@@ -367,6 +377,8 @@ lfs_set_dirop(struct vnode *vp)
struct lfs *fs;
int error;
+ LFS_ASSERT_UNLOCK_META(vp);
+
fs = VTOI(vp)->i_lfs;
/*
* We might need one directory block plus supporting indirect blocks,
@@ -977,4 +989,104 @@ lfs_putpages(void *v)
error = genfs_putpages(v);
return error;
+}
+
+static void
+lfs_strategy_iodone(struct buf *bp)
+{
+ struct vnode *vp = bp->b_vp;
+
+ KASSERT(!(bp->b_flags & B_CALL));
+ KASSERT(bp->b_flags & B_BUSY);
+ KASSERT(vp->v_tag == VT_LFS);
+
+ LFS_READ_UNLOCK_META(vp);
+
+ /* followings are copied from biodone */
+ if (bp->b_flags & B_ASYNC) /* if async, release */
+ brelse(bp);
+ else { /* or just wakeup the buffer */
+ bp->b_flags &= ~B_WANTED;
+ wakeup(bp);
+ }
+}
+
+/*
+ * Calculate the logical to physical mapping if not done already,
+ * then call the device strategy routine.
+ */
+int
+lfs_strategy(void *v)
+{
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap = v;
+ struct buf *bp;
+ struct vnode *vp;
+ struct inode *ip;
+ int error;
+ int needunlock;
+
+ bp = ap->a_bp;
+ vp = bp->b_vp;
+ ip = VTOI(vp);
+ if (vp->v_type == VBLK || vp->v_type == VCHR)
+ panic("lfs_strategy: spec");
+ KASSERT(bp->b_bcount != 0);
+
+ /*
+ * lfs uses strategy routine only for read.
+ */
+ KASSERT(bp->b_flags & B_READ);
+
+ if (LFS_OWN_LOCK_META(vp)) {
+ KASSERT(!(bp->b_flags & B_ASYNC));
+ /*
+ * if we're holding write lock,
+ * no need to acquire read lock.
+ * XXX ugly
+ */
+ needunlock = 0;
+ }
+ else {
+ LFS_READ_LOCK_META(vp);
+ needunlock = 1;
+ }
+ if (bp->b_lblkno == bp->b_blkno) {
+ error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL);
+ if (error) {
+ bp->b_error = error;
+ bp->b_flags |= B_ERROR;
+ goto done;
+ }
+ }
+ if ((long)bp->b_blkno == -1) /* no valid data */
+ clrbuf(bp);
+ if ((long)bp->b_blkno < 0) { /* block is not on disk */
+ error = 0;
+ goto done;
+ }
+ if (needunlock) {
+ /*
+ * aiodone routine will unlock metadata.
+ */
+ KASSERT((bp->b_flags & B_CALL) == 0);
+ bp->b_flags |= B_CALL;
+ bp->b_iodone = lfs_strategy_iodone;
+ /*
+ * pass lock to aiodoned.
+ */
+ LFS_PASS_METALOCK_TO_AIODONED(vp);
+ }
+ vp = ip->i_devvp;
+ bp->b_dev = vp->v_rdev;
+ VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
+ return (0);
+
+done:
+ if (needunlock) {
+ LFS_READ_UNLOCK_META(vp);
+ }
+ biodone(bp);
+ return (error);
}
--NextPart-20021215224130-0409300--