Subject: Re: poolifying fileassoc
To: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
From: Brett Lymn <blymn@baesystems.com.au>
List: tech-kern
Date: 04/08/2007 17:57:55
--bp/iNruPH9dso1Pn
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
On Sun, Apr 08, 2007 at 01:07:23AM +0900, YAMAMOTO Takashi wrote:
>
> can you rebase it to -current?
>
Done. Builds and works ok for me.
--
Brett Lymn
--bp/iNruPH9dso1Pn
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="iodone.diff"
Index: src/sys/uvm/uvm_pager.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pager.c,v
retrieving revision 1.81
diff -u -r1.81 uvm_pager.c
--- src/sys/uvm/uvm_pager.c 22 Feb 2007 06:05:01 -0000 1.81
+++ src/sys/uvm/uvm_pager.c 8 Apr 2007 08:25:00 -0000
@@ -43,6 +43,7 @@
#include "opt_uvmhist.h"
#include "opt_readahead.h"
+#include "veriexec.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -52,6 +53,7 @@
#include <sys/vnode.h>
#include <uvm/uvm.h>
+#include <sys/verified_exec.h>
struct pool *uvm_aiobuf_pool;
@@ -279,41 +281,73 @@
}
/*
- * uvm_aio_aiodone: do iodone processing for async i/os.
- * this should be called in thread context, not interrupt context.
+ *
+ * uvm_io_done: Set page flag status when an i/o (synch or asynch) has
+ * completed or an async i/o is started. This function handles the
+ * following situations:
+ *
+ * - Synchronous i/o has completed
+ * - Asynchronous i/o has been initiated
+ * - Asynchronous i/o had completed
+ * - i/o is associated with swap
+ *
+ * All these cases what needs to be done to the page structures is very
+ * similar with some small variations depending on the calling situation.
+ *
+ * Function arguments are:
+ *
+ * pgs array of pages to handle
+ * npages number of pages in request
+ * flags type of completion, see uvm_pager.h
+ * overwrite TRUE if overwriting pages
+ * ridx The io may have been rounded by prepending pages to
+ * the pgs array, this gives the index into the pgs array
+ * of the first page prior to rounding (set to 0 if no
+ * rounding done)
+ * orignpages original transaction size before rounding. If no
+ * rounding was done then set to npages.
+ * bp pointer to the io buffer. We need the flags, error and
+ * vnode pointer from this.
+ *
+ * This function must be called with no locks held, the page queue and uvm
+ * object locks will be acquired and released as needed.
+ *
*/
-
-void
-uvm_aio_aiodone(struct buf *bp)
+int
+uvm_io_done(struct vm_page **pgs, int npages, int flags, boolean_t overwrite,
+ int ridx, int orignpages, struct buf *bp)
{
- int npages = bp->b_bufsize >> PAGE_SHIFT;
- struct vm_page *pg, *pgs[npages];
- struct uvm_object *uobj;
+ struct vm_page *pg;
+ int i, error, swslot;
struct simplelock *slock;
- int s, i, error, swslot;
- bool write, swap;
- UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(ubchist);
- UVMHIST_LOG(ubchist, "bp %p", bp, 0,0,0);
+ struct uvm_object *uobj;
+ boolean_t write, swap;
error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0;
write = (bp->b_flags & B_READ) == 0;
- /* XXXUBC B_NOCACHE is for swap pager, should be done differently */
- if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) {
- (*bioops.io_pageiodone)(bp);
- }
- uobj = NULL;
for (i = 0; i < npages; i++) {
- pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
- UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0);
+ pg = pgs[i];
+ if (pg != NULL)
+ break;
}
- uvm_pagermapout((vaddr_t)bp->b_data, npages);
+
+ if (pg == NULL) /* no pages to do... */
+ return error;
swslot = 0;
slock = NULL;
- pg = pgs[0];
+ uobj = NULL;
+
swap = (pg->uanon != NULL && pg->uobject == NULL) ||
(pg->pqflags & PQ_AOBJ) != 0;
+
+#if NVERIEXEC > 0
+ if (!error && !write && !swap && (flags != UVMPAGER_ASYNCIO_START)) {
+ error = veriexec_block_verify(bp->b_vp, pgs, npages);
+ }
+#endif
+
if (!swap) {
uobj = pg->uobject;
slock = &uobj->vmobjlock;
@@ -335,9 +369,11 @@
panic("%s: swap", __func__);
#endif /* defined(VMSWAP) */
}
+
for (i = 0; i < npages; i++) {
+ if (pgs[i] == NULL)
+ continue;
pg = pgs[i];
- KASSERT(swap || pg->uobject == uobj);
UVMHIST_LOG(ubchist, "pg %p", pg, 0,0,0);
#if defined(VMSWAP)
@@ -347,11 +383,10 @@
*/
if (swap) {
- if (pg->uobject != NULL) {
+ if (pg->uobject != NULL)
slock = &pg->uobject->vmobjlock;
- } else {
+ else
slock = &pg->uanon->an_lock;
- }
simple_lock(slock);
uvm_lock_pageq();
}
@@ -369,7 +404,9 @@
if (error) {
int slot;
if (!write) {
- pg->flags |= PG_RELEASED;
+ if ((flags == UVMPAGER_ASYNCIO_DONE) ||
+ (pg->flags & PG_FAKE))
+ pg->flags |= PG_RELEASED;
continue;
} else if (error == ENOMEM) {
if (pg->flags & PG_PAGEOUT) {
@@ -405,16 +442,46 @@
* still be set from the page's previous identity.
*/
- if (pg->flags & PG_FAKE) {
- KASSERT(!write);
- pg->flags &= ~PG_FAKE;
+ if (pg->flags & PG_FAKE && !overwrite) {
+ pg->flags &= ~(PG_FAKE);
+ pmap_clear_modify(pgs[i]);
+ }
+
+ if (i < ridx || i >= ridx + orignpages ||
+ (flags == UVMPAGER_ASYNCIO_START)) {
+ UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
+ pg, pg->offset,0,0);
+ if (pg->flags & PG_FAKE) {
+ KASSERT(!write);
+ KASSERT(overwrite);
+ KASSERT((pg->flags & PG_CLEAN) != 0);
+ if (flags != UVMPAGER_ASYNCIO_DONE)
+ uvm_pagezero(pg);
+ else {
+ pmap_clear_modify(pg);
#if defined(READAHEAD_STATS)
- pg->pqflags |= PQ_READAHEAD;
- uvm_ra_total.ev_count++;
+ pg->pqflags |= PQ_READAHEAD;
+ uvm_ra_total.ev_count++;
#endif /* defined(READAHEAD_STATS) */
- KASSERT((pg->flags & PG_CLEAN) != 0);
+ }
+ }
+
+ if (flags != UVMPAGER_ASYNCIO_DONE) {
+ pg->flags &= ~PG_BUSY;
+ }
+
+ if (pg->flags & PG_WANTED) {
+ wakeup(pg);
+ }
+
+ if ((pg->flags & PG_RELEASED) &&
+ (flags != UVMPAGER_ASYNCIO_DONE)) {
+ uvm_pagefree(pg);
+ continue;
+ }
uvm_pageenqueue(pg);
- pmap_clear_modify(pg);
+ pg->flags &= ~(PG_WANTED|PG_FAKE);
+ UVM_PAGE_OWN(pg, NULL);
}
/*
@@ -435,6 +502,17 @@
*/
if (swap) {
+ if (i < ridx || i >= ridx + orignpages
+ || (flags == UVMPAGER_ASYNCIO_START)) {
+ if ((pg->flags & PG_WANTED) &&
+ ((flags != UVMPAGER_ASYNCIO_DONE))) {
+ wakeup(pg);
+ }
+ }
+ if ((flags != UVMPAGER_ASYNCIO_DONE))
+ pg->flags &= ~(PG_WANTED|PG_BUSY);
+ pg->flags &= ~PG_FAKE;
+
if (pg->uobject == NULL && pg->uanon->an_ref == 0 &&
(pg->flags & PG_RELEASED) != 0) {
uvm_unlock_pageq();
@@ -448,7 +526,8 @@
#endif /* defined(VMSWAP) */
}
if (!swap) {
- uvm_page_unbusy(pgs, npages);
+ if ((flags == UVMPAGER_ASYNCIO_DONE) || error)
+ uvm_page_unbusy(pgs, npages);
uvm_unlock_pageq();
simple_unlock(slock);
} else {
@@ -470,6 +549,40 @@
uvmexp.pdpending--;
#endif /* defined(VMSWAP) */
}
+
+ return error;
+}
+
+/*
+ * uvm_aio_aiodone: do iodone processing for async i/os.
+ * this should be called in thread context, not interrupt context.
+ */
+
+void
+uvm_aio_aiodone(struct buf *bp)
+{
+ int npages = bp->b_bufsize >> PAGE_SHIFT;
+ struct vm_page *pgs[npages];
+ int s, i, error;
+ boolean_t write;
+
+ UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(ubchist);
+ UVMHIST_LOG(ubchist, "bp %p", bp, 0,0,0);
+
+ write = (bp->b_flags & B_READ) == 0;
+ /* XXXUBC B_NOCACHE is for swap pager, should be done differently */
+ if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) {
+ (*bioops.io_pageiodone)(bp);
+ }
+
+ for (i = 0; i < npages; i++) {
+ pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
+ UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0);
+ }
+ uvm_pagermapout((vaddr_t)bp->b_data, npages);
+
+ error = uvm_io_done(pgs, npages, UVMPAGER_ASYNCIO_DONE, FALSE,
+ 0, npages, bp);
s = splbio();
if (write && (bp->b_flags & B_AGE) != 0) {
vwakeup(bp);
Index: src/sys/uvm/uvm_pager.h
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pager.h,v
retrieving revision 1.34
diff -u -r1.34 uvm_pager.h
--- src/sys/uvm/uvm_pager.h 22 Feb 2006 22:28:18 -0000 1.34
+++ src/sys/uvm/uvm_pager.h 8 Apr 2007 08:25:00 -0000
@@ -176,12 +176,19 @@
struct vm_page *uvm_pageratop(vaddr_t);
vaddr_t uvm_pagermapin(struct vm_page **, int, int);
void uvm_pagermapout(vaddr_t, int);
+int uvm_io_done(struct vm_page **, int, int, boolean_t, int, int,
+ struct buf *);
/* Flags to uvm_pagermapin() */
#define UVMPAGER_MAPIN_WAITOK 0x01 /* it's okay to wait */
#define UVMPAGER_MAPIN_READ 0x02 /* device -> host */
#define UVMPAGER_MAPIN_WRITE 0x00 /* host -> device (pseudo flag) */
+/* Flags to uvm_io_done */
+#define UVMPAGER_SYNCIO_DONE 0x01 /* Synch i/o has completed */
+#define UVMPAGER_ASYNCIO_START 0x02 /* Asynch i/o started */
+#define UVMPAGER_ASYNCIO_DONE 0x03 /* Asynch i/o has completed */
+
/*
* XXX
* this is needed until the device strategy interface
Index: src/sys/miscfs/genfs/genfs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/miscfs/genfs/genfs_vnops.c,v
retrieving revision 1.150
diff -u -r1.150 genfs_vnops.c
--- src/sys/miscfs/genfs/genfs_vnops.c 4 Mar 2007 06:03:14 -0000 1.150
+++ src/sys/miscfs/genfs/genfs_vnops.c 8 Apr 2007 08:25:00 -0000
@@ -455,10 +455,11 @@
panic("genfs_getpages: too many pages");
}
+ mbp = NULL;
pgs = pgs_onstack;
pgs_size = sizeof(pgs_onstack);
-startover:
+ startover:
error = 0;
origvsize = vp->v_size;
origoffset = ap->a_offset;
@@ -650,6 +651,7 @@
rw_exit(&gp->g_glock);
UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
npages += ridx;
+ simple_unlock(&uobj->vmobjlock);
goto out;
}
@@ -667,6 +669,7 @@
pg1->flags &= ~(PG_RDONLY|PG_CLEAN);
}
npages += ridx;
+ simple_unlock(&uobj->vmobjlock);
goto out;
}
@@ -874,7 +877,6 @@
if (bp != NULL) {
error = biowait(mbp);
}
- putiobuf(mbp);
uvm_pagermapout(kva, npages);
/*
@@ -902,74 +904,32 @@
}
}
rw_exit(&gp->g_glock);
- simple_lock(&uobj->vmobjlock);
-
- /*
- * we're almost done! release the pages...
- * for errors, we free the pages.
- * otherwise we activate them and mark them as valid and clean.
- * also, unbusy pages that were not actually requested.
- */
-
- if (error) {
- for (i = 0; i < npages; i++) {
- if (pgs[i] == NULL) {
- continue;
- }
- UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
- pgs[i], pgs[i]->flags, 0,0);
- if (pgs[i]->flags & PG_FAKE) {
- pgs[i]->flags |= PG_RELEASED;
- }
- }
- uvm_lock_pageq();
- uvm_page_unbusy(pgs, npages);
- uvm_unlock_pageq();
- simple_unlock(&uobj->vmobjlock);
- UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
- goto out_err;
- }
+/* simple_lock(&uobj->vmobjlock);*/
out:
- UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
- error = 0;
- uvm_lock_pageq();
- for (i = 0; i < npages; i++) {
- pg = pgs[i];
- if (pg == NULL) {
- continue;
- }
- UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
- pg, pg->flags, 0,0);
- if (pg->flags & PG_FAKE && !overwrite) {
- pg->flags &= ~(PG_FAKE);
- pmap_clear_modify(pgs[i]);
- }
- KASSERT(!write || !blockalloc || (pg->flags & PG_RDONLY) == 0);
- if (i < ridx || i >= ridx + orignpages || async) {
- UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
- pg, pg->offset,0,0);
- if (pg->flags & PG_WANTED) {
- wakeup(pg);
- }
- if (pg->flags & PG_FAKE) {
- KASSERT(overwrite);
- uvm_pagezero(pg);
- }
- if (pg->flags & PG_RELEASED) {
- uvm_pagefree(pg);
- continue;
- }
- uvm_pageenqueue(pg);
- pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
- UVM_PAGE_OWN(pg, NULL);
+ if (mbp == NULL) {
+ /*
+ * fake up a io buf to use with uvm_io_done iff the pages
+ * were already resident, just fill in the bits used.
+ */
+ mbp = getiobuf();
+ mbp->b_flags = B_READ| (async ? B_ASYNC : 0);
+ mbp->b_error = 0;
+ if (error) {
+ mbp->b_flags |= B_ERROR;
+ mbp->b_error = error;
}
+ mbp->b_vp = vp;
}
- uvm_unlock_pageq();
- simple_unlock(&uobj->vmobjlock);
- if (ap->a_m != NULL) {
+
+ error = uvm_io_done(pgs, npages,
+ ((async)? UVMPAGER_ASYNCIO_START : UVMPAGER_SYNCIO_DONE),
+ overwrite, ridx, orignpages, mbp);
+
+ putiobuf(mbp);
+ if ((!error) && (ap->a_m != NULL)) {
memcpy(ap->a_m, &pgs[ridx],
- orignpages * sizeof(struct vm_page *));
+ orignpages * sizeof(struct vm_page *));
}
out_err:
--bp/iNruPH9dso1Pn--