Subject: Re: Is O_DIRECT useless on NetBSD?
To: Roland Illig <rillig@NetBSD.org>
From: Antti Kantee <pooka@cs.hut.fi>
List: tech-kern
Date: 11/23/2007 17:51:14
--9zSXsLTf0vkW971A
Content-Type: text/plain; charset=iso-8859-1
Content-Disposition: inline
Content-Transfer-Encoding: 8bit
On Thu Nov 22 2007 at 23:53:34 +0100, Roland Illig wrote:
> Pavel Cahyna wrote:
> >>On Wed Nov 21 2007 at 16:08:59 +0100, Roland Illig wrote:
> >>
> >>>So am I right that currently there is no way for a program to say "I
> >>>will write some data to that file, and nobody is going to use it in the
> >>>next time, so please don't buffer it"?
> >
> >Or posix_fadvise?
>
> Nice idea, but ...
>
> BUGS
> POSIX_FADV_WILLNEED, POSIX_FADV_DONTNEED, and POSIX_FADV_NOREUSE
> are not implemented.
This quick patch/hack will add support for POSIX_FADV_NOREUSE. But I/O
will be quite slow if you use small sizes.
--
Antti Kantee <pooka@iki.fi> Of course he runs NetBSD
http://www.iki.fi/pooka/ http://www.NetBSD.org/
"la qualité la plus indispensable du cuisinier est l'exactitude"
--9zSXsLTf0vkW971A
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="noreuse.diff"
Index: sys/vnode.h
===================================================================
RCS file: /cvsroot/src/sys/sys/vnode.h,v
retrieving revision 1.174
diff -p -u -r1.174 vnode.h
--- sys/vnode.h 23 Nov 2007 11:53:45 -0000 1.174
+++ sys/vnode.h 23 Nov 2007 15:49:54 -0000
@@ -254,7 +254,7 @@ extern struct simplelock global_v_numout
#define IO_NORMAL 0x00800 /* operate on regular data */
#define IO_EXT 0x01000 /* operate on extended attributes */
#define IO_DIRECT 0x02000 /* direct I/O hint */
-#define IO_ADV_MASK 0x00003 /* access pattern hint */
+#define IO_ADV_MASK 0x00007 /* access pattern hint */
#define IO_ADV_SHIFT 0
#define IO_ADV_ENCODE(adv) (((adv) << IO_ADV_SHIFT) & IO_ADV_MASK)
Index: kern/kern_descrip.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_descrip.c,v
retrieving revision 1.162
diff -p -u -r1.162 kern_descrip.c
--- kern/kern_descrip.c 7 Nov 2007 00:23:20 -0000 1.162
+++ kern/kern_descrip.c 23 Nov 2007 15:49:55 -0000
@@ -1671,9 +1671,11 @@ sys_posix_fadvise(struct lwp *l, void *v
case POSIX_FADV_NORMAL:
case POSIX_FADV_RANDOM:
case POSIX_FADV_SEQUENTIAL:
+ case POSIX_FADV_NOREUSE:
KASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL);
KASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM);
KASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL);
+ KASSERT(POSIX_FADV_NOREUSE == UVM_ADV_NOREUSE);
/*
* we ignore offset and size.
@@ -1684,7 +1686,6 @@ sys_posix_fadvise(struct lwp *l, void *v
case POSIX_FADV_WILLNEED:
case POSIX_FADV_DONTNEED:
- case POSIX_FADV_NOREUSE:
/*
* not implemented yet.
Index: kern/vfs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_vnops.c,v
retrieving revision 1.143
diff -p -u -r1.143 vfs_vnops.c
--- kern/vfs_vnops.c 10 Oct 2007 20:42:27 -0000 1.143
+++ kern/vfs_vnops.c 23 Nov 2007 15:49:55 -0000
@@ -444,10 +444,11 @@ vn_write(struct file *fp, off_t *offset,
int flags)
{
struct vnode *vp = (struct vnode *)fp->f_data;
- int count, error, ioflag = IO_UNIT;
+ int count, error, ioflag;
struct lwp *l = curlwp;
mutex_enter(&fp->f_lock);
+ ioflag = IO_ADV_ENCODE(fp->f_advice) | IO_UNIT;
if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
ioflag |= IO_APPEND;
if (fp->f_flag & FNONBLOCK)
Index: uvm/uvm_bio.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_bio.c,v
retrieving revision 1.62
diff -p -u -r1.62 uvm_bio.c
--- uvm/uvm_bio.c 27 Jul 2007 09:50:37 -0000 1.62
+++ uvm/uvm_bio.c 23 Nov 2007 15:49:55 -0000
@@ -575,12 +575,14 @@ ubc_release(void *va, int flags)
struct uvm_object *uobj;
vaddr_t umapva;
bool unmapped;
+ int advice;
UVMHIST_FUNC("ubc_release"); UVMHIST_CALLED(ubchist);
UVMHIST_LOG(ubchist, "va %p", va, 0, 0, 0);
umap = &ubc_object.umap[((char *)va - ubc_object.kva) >> ubc_winshift];
umapva = UBC_UMAP_ADDR(umap);
uobj = umap->uobj;
+ advice = umap->advice;
KASSERT(uobj != NULL);
if (umap->flags & UMAP_PAGES_LOCKED) {
@@ -625,7 +627,19 @@ ubc_release(void *va, int flags)
umap->writelen = 0;
umap->refcount--;
if (umap->refcount == 0) {
- if (flags & UBC_UNMAP) {
+ if ((flags & UBC_UNMAP) || (advice == UVM_ADV_NOREUSE)) {
+
+ /*
+ * Clean & free all the pages in the window if the
+ * advice says they will not be needed any longer.
+ */
+ if (advice == UVM_ADV_NOREUSE) {
+ simple_lock(&uobj->vmobjlock);
+ (void) uobj->pgops->pgo_put(uobj,
+ umap->offset & ~(ubc_winsize-1),
+ (umap->offset+ubc_winsize)&~(ubc_winsize-1),
+ PGO_CLEANIT | PGO_FREE);
+ }
/*
* Invalidate any cached mappings if requested.
Index: uvm/uvm_extern.h
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.136
diff -p -u -r1.136 uvm_extern.h
--- uvm/uvm_extern.h 6 Nov 2007 00:42:46 -0000 1.136
+++ uvm/uvm_extern.h 23 Nov 2007 15:49:56 -0000
@@ -132,7 +132,9 @@ typedef voff_t pgoff_t; /* XXX: number
#define UVM_ADV_NORMAL 0x0 /* 'normal' */
#define UVM_ADV_RANDOM 0x1 /* 'random' */
#define UVM_ADV_SEQUENTIAL 0x2 /* 'sequential' */
-/* 0x3: will need, 0x4: dontneed */
+#define UVM_ADV_WILLNEED 0x3 /* pages will be needed */
+#define UVM_ADV_DONTNEED 0x4 /* pages won't be needed */
+#define UVM_ADV_NOREUSE 0x5 /* pages will be used only once */
#define UVM_ADV_MASK 0x7 /* mask */
/* bits 0xffff0000: mapping flags */
Index: uvm/uvm_readahead.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_readahead.c,v
retrieving revision 1.4
diff -p -u -r1.4 uvm_readahead.c
--- uvm/uvm_readahead.c 11 May 2007 12:11:09 -0000 1.4
+++ uvm/uvm_readahead.c 23 Nov 2007 15:49:56 -0000
@@ -195,7 +195,8 @@ uvm_ra_request(struct uvm_ractx *ra, int
off_t reqoff, size_t reqsize)
{
- if (ra == NULL || advice == UVM_ADV_RANDOM) {
+ if (ra == NULL
+ || advice == UVM_ADV_RANDOM || advice == UVM_ADV_NOREUSE) {
return;
}
Index: ufs/ufs/ufs_readwrite.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ufs/ufs_readwrite.c,v
retrieving revision 1.84
diff -p -u -r1.84 ufs_readwrite.c
--- ufs/ufs/ufs_readwrite.c 10 Oct 2007 20:42:40 -0000 1.84
+++ ufs/ufs/ufs_readwrite.c 23 Nov 2007 15:49:56 -0000
@@ -380,8 +380,8 @@ WRITE(void *v)
*/
ubc_flags |= UBC_WANT_UNMAP(vp) ? UBC_UNMAP : 0;
- error = ubc_uiomove(&vp->v_uobj, uio, bytelen, UVM_ADV_RANDOM,
- ubc_flags);
+ error = ubc_uiomove(&vp->v_uobj, uio, bytelen,
+ IO_ADV_DECODE(ioflag), ubc_flags);
/*
* update UVM's notion of the size now that we've
--9zSXsLTf0vkW971A--