Subject: file overwrite performance after ubc
To: None <tech-kern@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 05/21/2007 23:38:22
--NextPart-20070521233348-0389500
Content-Type: Text/Plain; charset=us-ascii
hi,
the attached patch is to improve file overwrite performance in common cases.
it also fixes PR/33152 and PR/36303.
(currently only ufs and nfs are implemented.)
YAMAMOTO Takashi
--NextPart-20070521233348-0389500
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="a.diff"
Index: sys/vnode.h
===================================================================
RCS file: /cvsroot/src/sys/sys/vnode.h,v
retrieving revision 1.168
diff -u -p -r1.168 vnode.h
--- sys/vnode.h 8 Apr 2007 11:20:50 -0000 1.168
+++ sys/vnode.h 21 May 2007 14:29:36 -0000
@@ -98,6 +98,7 @@ struct vnode {
#define v_usecount v_uobj.uo_refs
#define v_interlock v_uobj.vmobjlock
voff_t v_size; /* size of file */
+ voff_t v_writesize; /* new size after write */
int v_flag; /* flags */
int v_numoutput; /* number of pending writes */
long v_writecount; /* reference count of writers */
Index: kern/vfs_subr.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_subr.c,v
retrieving revision 1.287
diff -u -p -r1.287 vfs_subr.c
--- kern/vfs_subr.c 16 Apr 2007 05:14:54 -0000 1.287
+++ kern/vfs_subr.c 21 May 2007 14:29:37 -0000
@@ -594,7 +594,7 @@ getnewvnode(enum vtagtype tag, struct mo
KASSERT(uobj->pgops == &uvm_vnodeops);
KASSERT(uobj->uo_npages == 0);
KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
- vp->v_size = VSIZENOTSET;
+ vp->v_size = vp->v_writesize = VSIZENOTSET;
if (mp && error != EDEADLK)
vfs_unbusy(mp);
@@ -2638,8 +2638,8 @@ vfs_vnode_print(struct vnode *vp, int fu
uvm_object_printit(&vp->v_uobj, full, pr);
bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf));
(*pr)("\nVNODE flags %s\n", bf);
- (*pr)("mp %p numoutput %d size 0x%llx\n",
- vp->v_mount, vp->v_numoutput, vp->v_size);
+ (*pr)("mp %p numoutput %d size 0x%llx writesize 0x%llx\n",
+ vp->v_mount, vp->v_numoutput, vp->v_size, vp->v_writesize);
(*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
vp->v_data, vp->v_usecount, vp->v_writecount,
Index: uvm/uvm_bio.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_bio.c,v
retrieving revision 1.57
diff -u -p -r1.57 uvm_bio.c
--- uvm/uvm_bio.c 7 May 2007 12:39:45 -0000 1.57
+++ uvm/uvm_bio.c 21 May 2007 14:29:37 -0000
@@ -632,6 +632,57 @@ ubc_release(void *va, int flags)
simple_unlock(&ubc_object.uobj.vmobjlock);
}
+/*
+ * ubc_uiomove:
+ *
+ * XXX
+ */
+
+int
+ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo, int flags)
+{
+ voff_t off;
+ const bool overwrite = (flags & UBC_FAULTBUSY) != 0;
+ int error;
+
+ KASSERT(todo <= uio->uio_resid);
+ KASSERT(((flags & UBC_WRITE) != 0 && uio->uio_rw == UIO_WRITE) ||
+ ((flags & UBC_READ) != 0 && uio->uio_rw == UIO_READ));
+
+ off = uio->uio_offset;
+ error = 0;
+ while (todo > 0) {
+ vsize_t bytelen = todo;
+ void *win;
+
+ win = ubc_alloc(uobj, off, &bytelen, UVM_ADV_NORMAL, flags);
+ if (error == 0) {
+ error = uiomove(win, bytelen, uio);
+ }
+ if (error != 0 && overwrite) {
+ /*
+ * if we haven't initialized the pages yet,
+ * do it now. it's safe to use memset here
+ * because we just mapped the pages above.
+ */
+ printf("%s: error=%d\n", __func__, error);
+ memset(win, 0, bytelen);
+ }
+ ubc_release(win, flags);
+ off += bytelen;
+ todo -= bytelen;
+ if (error != 0 && (flags & UBC_PARTIALOK) != 0) {
+ break;
+ }
+#if 0
+ if (!overwrite) {
+ break;
+ }
+#endif
+ }
+
+ return error;
+}
#if 0 /* notused */
/*
Index: uvm/uvm_extern.h
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.129
diff -u -p -r1.129 uvm_extern.h
--- uvm/uvm_extern.h 24 Mar 2007 21:15:39 -0000 1.129
+++ uvm/uvm_extern.h 21 May 2007 14:29:37 -0000
@@ -189,14 +189,19 @@ typedef voff_t pgoff_t; /* XXX: number
/*
* flags for ubc_alloc()
*/
-#define UBC_READ 0x01
-#define UBC_WRITE 0x02
-#define UBC_FAULTBUSY 0x04
+#define UBC_READ 0x001
+#define UBC_WRITE 0x002
+#define UBC_FAULTBUSY 0x004
/*
* flags for ubc_release()
*/
-#define UBC_UNMAP 0x01
+#define UBC_UNMAP 0x010
+
+/*
+ * flags for ubc_uiomve()
+ */
+#define UBC_PARTIALOK 0x100
/*
* helpers for calling ubc_release()
@@ -556,6 +561,8 @@ void * ubc_alloc(struct uvm_object *,
int);
void ubc_release(void *, int);
void ubc_flush(struct uvm_object *, voff_t, voff_t);
+int ubc_uiomove(struct uvm_object *, struct uio *, vsize_t,
+ int);
/* uvm_fault.c */
#define uvm_fault(m, a, p) uvm_fault_internal(m, a, p, 0)
@@ -703,6 +710,7 @@ void uvm_deallocate(struct vm_map *, v
/* uvm_vnode.c */
void uvm_vnp_setsize(struct vnode *, voff_t);
+void uvm_vnp_setwritesize(struct vnode *, voff_t);
void uvm_vnp_sync(struct mount *);
struct uvm_object *uvn_attach(void *, vm_prot_t);
int uvn_findpages(struct uvm_object *, voff_t,
Index: uvm/uvm_vnode.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_vnode.c,v
retrieving revision 1.81
diff -u -p -r1.81 uvm_vnode.c
--- uvm/uvm_vnode.c 4 Mar 2007 06:03:49 -0000 1.81
+++ uvm/uvm_vnode.c 21 May 2007 14:29:37 -0000
@@ -212,7 +212,7 @@ uvn_attach(void *arg, vm_prot_t accesspr
UVMHIST_LOG(maphist,"<- done (VOP_GETATTR FAILED!)", 0,0,0,0);
return(NULL);
}
- vp->v_size = used_vnode_size;
+ vp->v_size = vp->v_writesize = used_vnode_size;
}
@@ -470,14 +470,29 @@ uvm_vnp_setsize(struct vnode *vp, voff_t
*/
oldsize = vp->v_size;
+ KASSERT(oldsize <= vp->v_writesize);
+ KASSERT(oldsize == vp->v_writesize || vp->v_writesize == newsize);
if (oldsize > pgend && oldsize != VSIZENOTSET) {
(void) uvn_put(uobj, pgend, 0, PGO_FREE | PGO_SYNCIO);
simple_lock(&uobj->vmobjlock);
}
- vp->v_size = newsize;
+ vp->v_size = vp->v_writesize = newsize;
simple_unlock(&uobj->vmobjlock);
}
+void
+uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize)
+{
+
+ simple_lock(&vp->v_interlock);
+ KASSERT(vp->v_size != VSIZENOTSET);
+ KASSERT(vp->v_writesize != VSIZENOTSET);
+ KASSERT(vp->v_size <= vp->v_writesize);
+ KASSERT(vp->v_size <= newsize);
+ vp->v_writesize = newsize;
+ simple_unlock(&vp->v_interlock);
+}
+
/*
* uvm_vnp_zerorange: set a range of bytes in a file to zero.
*/
Index: miscfs/genfs/genfs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/miscfs/genfs/genfs_vnops.c,v
retrieving revision 1.153
diff -u -p -r1.153 genfs_vnops.c
--- miscfs/genfs/genfs_vnops.c 17 May 2007 07:26:22 -0000 1.153
+++ miscfs/genfs/genfs_vnops.c 21 May 2007 14:29:38 -0000
@@ -425,7 +425,7 @@ genfs_getpages(void *v)
int i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
int fs_bshift, fs_bsize, dev_bshift;
int flags = ap->a_flags;
- size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
+ size_t bytes, iobytes, tailstart, tailbytes, totalbytes, skipbytes;
vaddr_t kva;
struct buf *bp, *mbp;
struct vnode *vp = ap->a_vp;
@@ -465,9 +465,19 @@ startover:
orignpages = *ap->a_count;
GOP_SIZE(vp, origvsize, &diskeof, 0);
if (flags & PGO_PASTEOF) {
+#if defined(DIAGNOSTIC)
+ off_t writeeof;
+#endif /* defined(DIAGNOSTIC) */
+
newsize = MAX(origvsize,
origoffset + (orignpages << PAGE_SHIFT));
GOP_SIZE(vp, newsize, &memeof, GOP_SIZE_MEM);
+#if defined(DIAGNOSTIC)
+ GOP_SIZE(vp, vp->v_writesize, &writeeof, GOP_SIZE_MEM);
+ if (newsize > round_page(writeeof)) {
+ panic("%s: past eof", __func__);
+ }
+#endif /* defined(DIAGNOSTIC) */
} else {
GOP_SIZE(vp, origvsize, &memeof, GOP_SIZE_MEM);
}
@@ -727,21 +737,24 @@ startover:
BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);
/*
+ * XXX update
* if EOF is in the middle of the range, zero the part past EOF.
* if the page including EOF is not PG_FAKE, skip over it since
* in that case it has valid data that we need to preserve.
*/
- if (tailbytes > 0) {
- size_t tailstart = bytes;
-
- if ((pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE) == 0) {
- tailstart = round_page(tailstart);
- tailbytes -= tailstart - bytes;
- }
- UVMHIST_LOG(ubchist, "tailbytes %p 0x%x 0x%x",
- kva, tailstart, tailbytes,0);
- memset((void *)(kva + tailstart), 0, tailbytes);
+ tailstart = bytes;
+ while (tailbytes > 0) {
+ const int len = PAGE_SIZE - (tailstart & PAGE_MASK);
+
+ KASSERT(len <= tailbytes);
+ if ((pgs[tailstart >> PAGE_SHIFT]->flags & PG_FAKE) != 0) {
+ memset((void *)(kva + tailstart), 0, len);
+ UVMHIST_LOG(ubchist, "tailbytes %p 0x%x 0x%x",
+ kva, tailstart, len, 0);
+ }
+ tailstart += len;
+ tailbytes -= len;
}
/*
@@ -1514,7 +1527,8 @@ genfs_do_io(struct vnode *vp, off_t off,
UVMHIST_LOG(ubchist, "vp %p kva %p len 0x%x flags 0x%x",
vp, kva, len, flags);
- GOP_SIZE(vp, vp->v_size, &eof, 0);
+ KASSERT(vp->v_size <= vp->v_writesize);
+ GOP_SIZE(vp, vp->v_writesize, &eof, 0);
if (vp->v_type != VBLK) {
fs_bshift = vp->v_mount->mnt_fs_bshift;
dev_bshift = vp->v_mount->mnt_dev_bshift;
Index: miscfs/specfs/spec_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/miscfs/specfs/spec_vnops.c,v
retrieving revision 1.98
diff -u -p -r1.98 spec_vnops.c
--- miscfs/specfs/spec_vnops.c 4 Mar 2007 06:03:14 -0000 1.98
+++ miscfs/specfs/spec_vnops.c 21 May 2007 14:29:38 -0000
@@ -245,7 +245,8 @@ spec_open(v)
if (error)
return error;
if (!(*d_ioctl)(vp->v_rdev, DIOCGPART, (void *)&pi, FREAD, curlwp))
- vp->v_size = (voff_t)pi.disklab->d_secsize * pi.part->p_size;
+ uvm_vnp_setsize(vp,
+ (voff_t)pi.disklab->d_secsize * pi.part->p_size);
return 0;
}
Index: ufs/ffs/ffs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ffs/ffs_vnops.c,v
retrieving revision 1.87
diff -u -p -r1.87 ffs_vnops.c
--- ufs/ffs/ffs_vnops.c 17 May 2007 07:26:23 -0000 1.87
+++ ufs/ffs/ffs_vnops.c 21 May 2007 14:29:38 -0000
@@ -110,7 +110,7 @@ const struct vnodeopv_entry_desc ffs_vno
{ &vop_pathconf_desc, ufs_pathconf }, /* pathconf */
{ &vop_advlock_desc, ufs_advlock }, /* advlock */
{ &vop_bwrite_desc, vn_bwrite }, /* bwrite */
- { &vop_getpages_desc, ffs_getpages }, /* getpages */
+ { &vop_getpages_desc, genfs_getpages }, /* getpages */
{ &vop_putpages_desc, genfs_putpages }, /* putpages */
{ &vop_openextattr_desc, ffs_openextattr }, /* openextattr */
{ &vop_closeextattr_desc, ffs_closeextattr }, /* closeextattr */
@@ -514,6 +514,7 @@ ffs_reclaim(void *v)
return (0);
}
+#if 0
int
ffs_getpages(void *v)
{
@@ -548,6 +549,7 @@ ffs_getpages(void *v)
}
return genfs_getpages(v);
}
+#endif
/*
* Return the last logical file offset that should be written for this file
Index: ufs/ufs/ufs_readwrite.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ufs/ufs_readwrite.c,v
retrieving revision 1.78
diff -u -p -r1.78 ufs_readwrite.c
--- ufs/ufs/ufs_readwrite.c 17 May 2007 07:26:23 -0000 1.78
+++ ufs/ufs/ufs_readwrite.c 21 May 2007 14:29:38 -0000
@@ -214,9 +214,7 @@ WRITE(void *v)
off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize;
int blkoffset, error, flags, ioflag, resid, size, xfersize;
int aflag;
- int ubc_alloc_flags, ubc_release_flags;
int extended=0;
- void *win;
vsize_t bytelen;
bool async;
bool usepc = false;
@@ -314,20 +312,20 @@ WRITE(void *v)
off_t eob;
eob = blkroundup(fs, osize);
+ uvm_vnp_setwritesize(vp, eob);
error = ufs_balloc_range(vp, osize, eob - osize, cred, aflag);
if (error)
goto out;
if (flags & B_SYNC) {
- vp->v_size = eob;
simple_lock(&vp->v_interlock);
VOP_PUTPAGES(vp, trunc_page(osize & fs->fs_bmask),
round_page(eob), PGO_CLEANIT | PGO_SYNCIO);
}
}
- ubc_alloc_flags = UBC_WRITE;
while (uio->uio_resid > 0) {
- bool extending; /* if we're extending a whole block */
+ int ubc_flags = UBC_WRITE;
+ bool overwrite; /* if we're overwrite a whole block */
off_t newoff;
if (ioflag & IO_DIRECT) {
@@ -348,15 +346,31 @@ WRITE(void *v)
* since the new blocks will be inaccessible until the write
* is complete.
*/
- extending = uio->uio_offset >= preallocoff &&
+ overwrite = uio->uio_offset >= preallocoff &&
uio->uio_offset < endallocoff;
+ if (!overwrite && (vp->v_flag & VMAPPED) == 0 &&
+ blkoff(fs, uio->uio_offset) == 0 &&
+ (uio->uio_offset & PAGE_MASK) == 0) {
+ vsize_t len;
+
+ len = trunc_page(bytelen);
+ len -= blkoff(fs, len);
+ if (len > 0) {
+ overwrite = true;
+ bytelen = len;
+ }
+ }
- if (!extending) {
+ newoff = oldoff + bytelen;
+ if (vp->v_size < newoff) {
+ uvm_vnp_setwritesize(vp, newoff);
+ }
+
+ if (!overwrite) {
error = ufs_balloc_range(vp, uio->uio_offset, bytelen,
cred, aflag);
if (error)
break;
- ubc_alloc_flags &= ~UBC_FAULTBUSY;
} else {
genfs_node_wrlock(vp);
error = GOP_ALLOC(vp, uio->uio_offset, bytelen,
@@ -364,26 +378,15 @@ WRITE(void *v)
genfs_node_unlock(vp);
if (error)
break;
- ubc_alloc_flags |= UBC_FAULTBUSY;
+ ubc_flags |= UBC_FAULTBUSY;
}
/*
* copy the data.
*/
- win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen,
- UVM_ADV_NORMAL, ubc_alloc_flags);
- error = uiomove(win, bytelen, uio);
- if (error && extending) {
- /*
- * if we haven't initialized the pages yet,
- * do it now. it's safe to use memset here
- * because we just mapped the pages above.
- */
- memset(win, 0, bytelen);
- }
- ubc_release_flags = UBC_WANT_UNMAP(vp) ? UBC_UNMAP : 0;
- ubc_release(win, ubc_release_flags);
+ ubc_flags |= UBC_WANT_UNMAP(vp) ? UBC_UNMAP : 0;
+ error = ubc_uiomove(&vp->v_uobj, uio, bytelen, ubc_flags);
/*
* update UVM's notion of the size now that we've
@@ -393,7 +396,6 @@ WRITE(void *v)
* otherwise ffs_truncate can't flush soft update states.
*/
- newoff = oldoff + bytelen;
if (vp->v_size < newoff) {
uvm_vnp_setsize(vp, newoff);
extended = 1;
Index: nfs/nfs_bio.c
===================================================================
RCS file: /cvsroot/src/sys/nfs/nfs_bio.c,v
retrieving revision 1.154
diff -u -p -r1.154 nfs_bio.c
--- nfs/nfs_bio.c 9 May 2007 23:17:45 -0000 1.154
+++ nfs/nfs_bio.c 21 May 2007 14:29:38 -0000
@@ -462,10 +462,9 @@ nfs_write(v)
kauth_cred_t cred = ap->a_cred;
struct vattr vattr;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- void *win;
voff_t oldoff, origoff;
vsize_t bytelen;
- int flags, error = 0;
+ int error = 0;
int ioflag = ap->a_ioflag;
int extended = 0, wrotedata = 0;
@@ -519,7 +518,7 @@ nfs_write(v)
origoff = uio->uio_offset;
do {
- bool extending; /* if we are extending whole pages */
+ bool overwrite; /* if we are overwriting whole pages */
u_quad_t oldsize;
oldoff = uio->uio_offset;
bytelen = uio->uio_resid;
@@ -531,17 +530,27 @@ nfs_write(v)
if (np->n_size < uio->uio_offset + bytelen) {
np->n_size = uio->uio_offset + bytelen;
}
- extending = ((uio->uio_offset & PAGE_MASK) == 0 &&
- (bytelen & PAGE_MASK) == 0 &&
- uio->uio_offset >= vp->v_size);
- win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen,
- UVM_ADV_NORMAL,
- UBC_WRITE | (extending ? UBC_FAULTBUSY : 0));
- error = uiomove(win, bytelen, uio);
- flags = UBC_WANT_UNMAP(vp) ? UBC_UNMAP : 0;
- ubc_release(win, flags);
+ overwrite = false;
+ if ((uio->uio_offset & PAGE_MASK) == 0) {
+ if ((vp->v_flag & VMAPPED) == 0 &&
+ bytelen > PAGE_SIZE) {
+ bytelen = trunc_page(bytelen);
+ overwrite = true;
+ } else if ((bytelen & PAGE_MASK) == 0 &&
+ uio->uio_offset >= vp->v_size) {
+ overwrite = true;
+ }
+ }
+ if (vp->v_size < uio->uio_offset + bytelen) {
+ uvm_vnp_setwritesize(vp, uio->uio_offset + bytelen);
+ }
+ error = ubc_uiomove(&vp->v_uobj, uio, bytelen,
+ UBC_WRITE | UBC_PARTIALOK |
+ (overwrite ? UBC_FAULTBUSY : 0) |
+ (UBC_WANT_UNMAP(vp) ? UBC_UNMAP : 0));
if (error) {
- if (extending) {
+ uvm_vnp_setwritesize(vp, vp->v_size);
+ if (overwrite && np->n_size != oldsize) {
/*
* backout size and free pages past eof.
*/
--NextPart-20070521233348-0389500--