Subject: Re: NEW_BUFQ_STRATEGY
To: None <chuq@chuq.com>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 12/04/2003 10:38:26
--NextPart-20031204103217-0108700
Content-Type: Text/Plain; charset=us-ascii
hi,
> On Sun, Nov 30, 2003 at 10:33:47AM -0800, Jason Thorpe wrote:
> >
> > On Nov 28, 2003, at 2:10 PM, Chuck Silvers wrote:
> >
> > >has anyone had any problems with NEW_BUFQ_STRATEGY lately?
> > >is there consensus on whether or not this is ready to become the
> > >default?
> >
> > I think the consensus is that it's stable, but I heard that it can have
> > some pretty horrible performance effects with some file systems if
> > they're exported by NFS.
>
> I tried bonnie++ over NFS, and NEW_BUFQ_STRATEGY didn't affect the results
> significantly. if this effect really does exist, I don't know how to
> trigger it.
because synchronous writes, which nfsd often requests, easily suffer?
i guess explicitly distinguishing sync/async requests like the
attached diff makes it better.
YAMAMOTO Takashi
--NextPart-20031204103217-0108700
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="bprio.diff"
Index: miscfs/genfs/genfs_vnops.c
===================================================================
--- miscfs/genfs/genfs_vnops.c (revision 444)
+++ miscfs/genfs/genfs_vnops.c (working copy)
@@ -822,6 +822,10 @@ genfs_getpages(void *v)
"bp %p offset 0x%x bcount 0x%x blkno 0x%x",
bp, offset, iobytes, bp->b_blkno);
+ if (async)
+ BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
+ else
+ BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
VOP_STRATEGY(bp);
}
@@ -1489,6 +1493,10 @@ genfs_gop_write(struct vnode *vp, struct
UVMHIST_LOG(ubchist,
"vp %p offset 0x%x bcount 0x%x blkno 0x%x",
vp, offset, bp->b_bcount, bp->b_blkno);
+ if (async)
+ BIO_SETPRIO(bp, BPRIO_TIMELIMITED); /* XXXnot exactly */
+ else
+ BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
VOP_STRATEGY(bp);
}
if (skipbytes) {
Index: ufs/ufs/ufs_bmap.c
===================================================================
--- ufs/ufs/ufs_bmap.c (revision 266)
+++ ufs/ufs/ufs_bmap.c (working copy)
@@ -243,6 +243,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, run
trace(TR_BREADMISS, pack(vp, size), metalbn);
bp->b_blkno = blkptrtodb(ump, daddr);
bp->b_flags |= B_READ;
+ BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
VOP_STRATEGY(bp);
curproc->p_stats->p_ru.ru_inblock++; /* XXX */
if ((error = biowait(bp)) != 0) {
Index: ufs/ffs/ffs_inode.c
===================================================================
--- ufs/ffs/ffs_inode.c (revision 266)
+++ ufs/ffs/ffs_inode.c (working copy)
@@ -563,6 +563,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, lev
if (bp->b_bcount > bp->b_bufsize)
panic("ffs_indirtrunc: bad buffer size");
bp->b_blkno = dbn;
+ BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
VOP_STRATEGY(bp);
error = biowait(bp);
}
Index: kern/vfs_bio.c
===================================================================
--- kern/vfs_bio.c (revision 413)
+++ kern/vfs_bio.c (working copy)
@@ -272,6 +272,10 @@ bio_doread(vp, blkno, size, cred, async)
if (!ISSET(bp->b_flags, (B_DONE | B_DELWRI))) {
/* Start I/O for the buffer. */
SET(bp->b_flags, B_READ | async);
+ if (async)
+ BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
+ else
+ BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
VOP_STRATEGY(bp);
/* Pay for the read. */
@@ -426,6 +430,11 @@ bwrite(bp)
simple_unlock(&bp->b_interlock);
splx(s);
+ if (sync)
+ BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
+ else
+ BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
+
VOP_STRATEGY(bp);
if (sync) {
@@ -757,6 +766,7 @@ start:
} else {
allocbuf(bp, size);
}
+ BIO_SETPRIO(bp, BPRIO_DEFAULT);
return (bp);
}
@@ -781,6 +791,7 @@ geteblk(size)
simple_unlock(&bp->b_interlock);
splx(s);
allocbuf(bp, size);
+ BIO_SETPRIO(bp, BPRIO_DEFAULT);
return (bp);
}
@@ -1019,6 +1030,7 @@ biodone(bp)
if (ISSET(bp->b_flags, B_DONE))
panic("biodone already");
SET(bp->b_flags, B_DONE); /* note that it's done */
+ BIO_SETPRIO(bp, BPRIO_DEFAULT);
if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete)
(*bioops.io_complete)(bp);
Index: kern/subr_disk.c
===================================================================
--- kern/subr_disk.c (revision 283)
+++ kern/subr_disk.c (working copy)
@@ -679,7 +679,7 @@ bufq_disksort_get(struct bufq_state *buf
return (bp);
}
-
+int btypes[2][2][2];
/*
* Seek sort for disks.
*
@@ -699,10 +699,16 @@ bufq_prio_put(struct bufq_state *bufq, s
sortby = bufq->bq_flags & BUFQ_SORT_MASK;
+ btypes[bp->b_prio][(bp->b_flags & B_READ) ? 0 : 1]
+ [(bp->b_flags & B_ASYNC) ? 0 : 1]++;
/*
* If it's a read request append it to the list.
*/
+#if 0
if ((bp->b_flags & B_READ) == B_READ) {
+#else
+ if (bp->b_prio > BPRIO_DEFAULT) {
+#endif
TAILQ_INSERT_TAIL(&prio->bq_read, bp, b_actq);
return;
}
@@ -797,7 +803,11 @@ bufq_prio_get(struct bufq_state *bufq, i
bp = prio->bq_next;
if (bp != NULL && remove) {
+#if 0
if ((bp->b_flags & B_READ) == B_READ) {
+#else
+ if (bp->b_prio > BPRIO_DEFAULT) {
+#endif
TAILQ_REMOVE(&prio->bq_read, bp, b_actq);
bufq_cluster_stat(bp, TAILQ_FIRST(&prio->bq_read));
} else {
Index: kern/kern_physio.c
===================================================================
--- kern/kern_physio.c (revision 283)
+++ kern/kern_physio.c (working copy)
@@ -208,6 +208,8 @@ physio(strategy, bp, dev, flags, minphys
}
vmapbuf(bp, todo);
+ BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
+
/* [call strategy to start the transfer] */
(*strategy)(bp);
Index: sys/buf.h
===================================================================
--- sys/buf.h (revision 284)
+++ sys/buf.h (working copy)
@@ -156,6 +156,7 @@ struct buf {
struct proc *b_proc; /* Associated proc if B_PHYS set. */
volatile long b_flags; /* B_* flags. */
struct simplelock b_interlock; /* Lock for b_flags changes */
+ int b_prio;
int b_error; /* Errno value. */
long b_bufsize; /* Allocated buffer size. */
long b_bcount; /* Valid bytes in buffer. */
@@ -250,6 +251,11 @@ do { \
#ifdef _KERNEL
+#define BIO_SETPRIO(bp, prio) (bp)->b_prio = (prio)
+#define BPRIO_TIMECRITICAL 1
+#define BPRIO_TIMELIMITED 0
+#define BPRIO_DEFAULT 0
+
extern struct bio_ops bioops;
extern u_int nbuf; /* The number of buffer headers */
extern struct buf *buf; /* The buffer headers. */
--NextPart-20031204103217-0108700--