Subject: Re: ufs-ism in lookup(9)
To: None <wrstuden@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 04/03/2004 23:34:38
--NextPart-20040403232804-0245300
Content-Type: Text/Plain; charset=us-ascii
hi,
> > i'd say the comments are wrong, however, ok, i have no particular problem
> > with your suggested way. (ie. change the MAKEENTRY's behaviour)
>
> Thank you. Also put something in there somewhere about what we should do
> about negaitve caching & DELETE or RENAME for the remote fs case. I'd
> suggest turning the cnp->cn_nameiop != CREATE into cnp->cn_nameiop ==
> LOOKUP, but the point's still up for discussion if you want something
> else.
after some more thoughts, i think it's better to leave the decision to
each filesystems as they should know their own characteristics better.
how about the attached diffs? (ufs and nfs only.)
YAMAMOTO Takashi
--NextPart-20040403232804-0245300
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="lookup4.diff"
Index: kern/vfs_lookup.c
===================================================================
--- kern/vfs_lookup.c (revision 462)
+++ kern/vfs_lookup.c (working copy)
@@ -323,7 +323,6 @@ lookup(ndp)
struct vnode *dp = 0; /* the directory we are searching */
struct vnode *tdp; /* saved dp */
struct mount *mp; /* mount table entry */
- int docache; /* == 0 do not cache last component */
int wantparent; /* 1 => wantparent or lockparent flag */
int rdonly; /* lookup read-only flag bit */
int error = 0;
@@ -335,10 +334,6 @@ lookup(ndp)
* Setup: break out flag bits into variables.
*/
wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
- docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
- if (cnp->cn_nameiop == DELETE ||
- (wantparent && cnp->cn_nameiop != CREATE))
- docache = 0;
rdonly = cnp->cn_flags & RDONLY;
ndp->ni_dvp = NULL;
cnp->cn_flags &= ~ISSYMLINK;
@@ -428,13 +423,8 @@ dirloop:
* a directory. Cache all intervening lookups, but not the final one.
*/
if (*cp == '\0') {
- if (docache)
- cnp->cn_flags |= MAKEENTRY;
- else
- cnp->cn_flags &= ~MAKEENTRY;
cnp->cn_flags |= ISLASTCN;
} else {
- cnp->cn_flags |= MAKEENTRY;
cnp->cn_flags &= ~ISLASTCN;
}
if (cnp->cn_namelen == 2 &&
Index: kern/vfs_cache.c
===================================================================
--- kern/vfs_cache.c (revision 637)
+++ kern/vfs_cache.c (working copy)
@@ -179,7 +179,6 @@ cache_lookup(struct vnode *dvp, struct v
int error;
if (!doingcache) {
- cnp->cn_flags &= ~MAKEENTRY;
*vpp = NULL;
return (-1);
}
@@ -187,7 +186,6 @@ cache_lookup(struct vnode *dvp, struct v
if (cnp->cn_namelen > NCHNAMLEN) {
/* XXXSMP - updating stats without lock; do we care? */
nchstats.ncs_long++;
- cnp->cn_flags &= ~MAKEENTRY;
goto fail;
}
simple_lock(&namecache_slock);
@@ -196,31 +194,22 @@ cache_lookup(struct vnode *dvp, struct v
nchstats.ncs_miss++;
goto fail_wlock;
}
- if ((cnp->cn_flags & MAKEENTRY) == 0) {
- nchstats.ncs_badhits++;
- goto remove;
- } else if (ncp->nc_vp == NULL) {
+ if (ncp->nc_vp == NULL) {
/*
* Restore the ISWHITEOUT flag saved earlier.
*/
cnp->cn_flags |= ncp->nc_flags;
- if (cnp->cn_nameiop != CREATE ||
- (cnp->cn_flags & ISLASTCN) == 0) {
- nchstats.ncs_neghits++;
- /*
- * Move this slot to end of LRU chain,
- * if not already there.
- */
- if (TAILQ_NEXT(ncp, nc_lru) != 0) {
- TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
- TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
- }
- simple_unlock(&namecache_slock);
- return (ENOENT);
- } else {
- nchstats.ncs_badhits++;
- goto remove;
+ nchstats.ncs_neghits++;
+ /*
+ * Move this slot to end of LRU chain,
+ * if not already there.
+ */
+ if (TAILQ_NEXT(ncp, nc_lru) != 0) {
+ TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+ TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
}
+ simple_unlock(&namecache_slock);
+ return (ENOENT);
}
vp = ncp->nc_vp;
@@ -313,15 +302,6 @@ cache_lookup(struct vnode *dvp, struct v
*vpp = vp;
return (0);
-remove:
- /*
- * Last component and we are renaming or deleting,
- * the cache entry is invalid, or otherwise don't
- * want cache entry to exist.
- */
- cache_remove(ncp);
- cache_free(ncp);
-
fail_wlock:
simple_unlock(&namecache_slock);
fail:
@@ -407,12 +387,10 @@ cache_enter(struct vnode *dvp, struct vn
struct nchashhead *ncpp;
struct ncvhashhead *nvcpp;
-#ifdef DIAGNOSTIC
- if (cnp->cn_namelen > NCHNAMLEN)
- panic("cache_enter: name too long");
-#endif
if (!doingcache)
return;
+ if (cnp->cn_namelen > NCHNAMLEN)
+ return;
/*
* Free the cache slot at head of lru chain.
*/
Index: ufs/ufs/ufs_lookup.c
===================================================================
--- ufs/ufs/ufs_lookup.c (revision 599)
+++ ufs/ufs/ufs_lookup.c (working copy)
@@ -133,6 +133,7 @@ ufs_lookup(v)
const int needswap = UFS_MPNEEDSWAP(ap->a_dvp->v_mount);
int dirblksiz = DIRBLKSIZ;
ino_t foundino;
+ boolean_t toremove, tocreate;
if (UFS_MPISAPPLEUFS(ap->a_dvp->v_mount)) {
dirblksiz = APPLEUFS_DIRBLKSIZ;
}
@@ -155,8 +156,11 @@ ufs_lookup(v)
if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0)
return (error);
- if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
- (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
+ toremove = (flags & ISLASTCN) &&
+ (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME);
+ tocreate = (flags & ISLASTCN) &&
+ (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME);
+ if (toremove && (vdp->v_mount->mnt_flag & MNT_RDONLY))
return (EROFS);
/*
@@ -166,8 +170,45 @@ ufs_lookup(v)
* check the name cache to see if the directory/name pair
* we are looking for is known already.
*/
- if ((error = cache_lookup(vdp, vpp, cnp)) >= 0)
- return (error);
+ /*
+ * XXX
+ * this chunk of the code is common between some filesystems
+ * which rely on side effects of lookup.
+ * probably it's better to make this into a subroutine.
+ */
+ error = cache_lookup(vdp, vpp, cnp);
+ if (error >= 0) {
+
+ /*
+ * we can't always use cached results because
+ * the subsequent dirop VOP relies on the side effects of
+ * non-cached lookup.
+ */
+
+ if (!(error == ENOENT && tocreate) && !(error == 0 && toremove))
+ return (error);
+
+ /*
+ * we need side effects of lookup. undo locking.
+ */
+
+ if (vdp == *vpp)
+ vrele(*vpp);
+ else if (*vpp != NULL)
+ vput(*vpp);
+ if (cnp->cn_flags & PDIRUNLOCK) {
+ vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY);
+ cnp->cn_flags &= ~PDIRUNLOCK;
+ }
+ *vpp = NULL;
+
+ /*
+ * purge cache.
+ * XXX it should belong to each dirops?
+ */
+
+ cache_purge1(vdp, cnp, 0);
+ }
/*
* Suppress search for slots unless creating
@@ -177,8 +218,7 @@ ufs_lookup(v)
*/
slotstatus = FOUND;
slotfreespace = slotsize = slotneeded = 0;
- if ((nameiop == CREATE || nameiop == RENAME) &&
- (flags & ISLASTCN)) {
+ if (tocreate) {
slotstatus = NONE;
slotneeded = (sizeof(struct direct) - MAXNAMLEN +
cnp->cn_namelen + 3) &~ 3;
@@ -371,11 +411,10 @@ notfound:
* directory has not been removed, then can consider
* allowing file to be created.
*/
- if ((nameiop == CREATE || nameiop == RENAME ||
- (nameiop == DELETE &&
+ if ((tocreate || (nameiop == DELETE &&
(ap->a_cnp->cn_flags & DOWHITEOUT) &&
- (ap->a_cnp->cn_flags & ISWHITEOUT))) &&
- (flags & ISLASTCN) && dp->i_ffs_effnlink != 0) {
+ (ap->a_cnp->cn_flags & ISWHITEOUT) &&
+ (flags & ISLASTCN))) && dp->i_ffs_effnlink != 0) {
/*
* Access for write is interpreted as allowing
* creation of files in the directory.
@@ -435,8 +474,9 @@ notfound:
/*
* Insert name into cache (as non-existent) if appropriate.
*/
- if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
- cache_enter(vdp, *vpp, cnp);
+ if (!tocreate)
+ cache_enter(vdp, NULL, cnp);
+ KASSERT(*vpp == NULL);
return (ENOENT);
found:
@@ -605,8 +645,9 @@ found:
/*
* Insert name into cache if appropriate.
*/
- if (cnp->cn_flags & MAKEENTRY)
+ if (!toremove)
cache_enter(vdp, *vpp, cnp);
+ KASSERT(*vpp != NULL);
return (0);
}
Index: nfs/nfs_vnops.c
===================================================================
--- nfs/nfs_vnops.c (revision 645)
+++ nfs/nfs_vnops.c (working copy)
@@ -830,6 +830,7 @@ nfs_lookup(v)
nfsfh_t *fhp;
struct nfsnode *np;
int lockparent, wantparent, error = 0, attrflag, fhsize;
+ boolean_t toremove, tocreate;
const int v3 = NFS_ISV3(dvp);
cnp->cn_flags &= ~PDIRUNLOCK;
@@ -837,8 +838,11 @@ nfs_lookup(v)
*vpp = NULLVP;
newvp = NULLVP;
- if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
- (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
+ toremove = (flags & ISLASTCN) &&
+ (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME);
+ tocreate = (flags & ISLASTCN) &&
+ (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME);
+ if (toremove && (dvp->v_mount->mnt_flag & MNT_RDONLY))
return (EROFS);
if (dvp->v_type != VDIR)
return (ENOTDIR);
@@ -866,6 +870,7 @@ nfs_lookup(v)
}
if (cnp->cn_flags & PDIRUNLOCK) {
+ /* XXX locking order */
err2 = vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
if (err2 != 0) {
*vpp = NULLVP;
@@ -887,10 +892,23 @@ nfs_lookup(v)
}
if (error == ENOENT) {
+ /*
+ * if the directory is changed by others,
+ * we can't trust negative caches.
+ */
if (!VOP_GETATTR(dvp, &vattr, cnp->cn_cred,
cnp->cn_proc) && timespeccmp(&vattr.va_mtime,
- &VTONFS(dvp)->n_nctime, ==))
- return ENOENT;
+ &VTONFS(dvp)->n_nctime, ==)) {
+ if (tocreate) {
+ if (dvp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else
+ error = EJUSTRETURN;
+ cnp->cn_flags |= SAVENAME;
+ cache_purge1(dvp, cnp, 0);
+ }
+ return error;
+ }
cache_purge1(dvp, NULL, PURGE_CHILDREN);
timespecclear(&np->n_nctime);
goto dorpc;
@@ -901,7 +919,7 @@ nfs_lookup(v)
&& vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime)
{
nfsstats.lookupcache_hits++;
- if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
+ if (toremove || tocreate)
cnp->cn_flags |= SAVENAME;
if ((!lockparent || !(flags & ISLASTCN)) &&
newvp != dvp)
@@ -1030,12 +1048,12 @@ dorpc:
cnp->cn_flags |= PDIRUNLOCK;
}
}
+ KASSERT(error == 0);
+ KASSERT(newvp != NULL);
if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
cnp->cn_flags |= SAVENAME;
- if ((cnp->cn_flags & MAKEENTRY) &&
- (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
+ if (!toremove)
nfs_cache_enter(dvp, newvp, cnp);
- }
*vpp = newvp;
nfsm_reqdone;
if (error) {
@@ -1045,8 +1063,7 @@ dorpc:
* (the nfsm_* macros will jump to nfsm_reqdone
* on error).
*/
- if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) &&
- cnp->cn_nameiop != CREATE) {
+ if (error == ENOENT && !tocreate) {
nfs_cache_enter(dvp, NULL, cnp);
}
if (newvp != NULLVP) {
@@ -1054,8 +1071,7 @@ dorpc:
if (newvp != dvp)
VOP_UNLOCK(newvp, 0);
}
- if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
- (flags & ISLASTCN) && error == ENOENT) {
+ if (tocreate && error == ENOENT) {
if (dvp->v_mount->mnt_flag & MNT_RDONLY)
error = EROFS;
else
@@ -1072,6 +1088,7 @@ validate:
* make sure we have valid type and size.
*/
+ KASSERT(error == 0);
newvp = *vpp;
if (newvp->v_type == VNON) {
struct vattr vattr; /* dummy */
@@ -1514,6 +1531,8 @@ nfs_mknodrpc(dvp, vpp, cnp, vap)
u_int32_t rdev;
const int v3 = NFS_ISV3(dvp);
+ KASSERT(cnp->cn_nameiop == CREATE);
+
if (vap->va_type == VCHR || vap->va_type == VBLK)
rdev = txdr_unsigned(vap->va_rdev);
else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
@@ -1564,8 +1583,7 @@ nfs_mknodrpc(dvp, vpp, cnp, vap)
if (newvp)
vput(newvp);
} else {
- if (cnp->cn_flags & MAKEENTRY)
- nfs_cache_enter(dvp, newvp, cnp);
+ nfs_cache_enter(dvp, newvp, cnp);
*vpp = newvp;
}
PNBUF_PUT(cnp->cn_pnbuf);
@@ -1626,6 +1644,9 @@ nfs_create(v)
struct mbuf *mreq, *mrep, *md, *mb;
const int v3 = NFS_ISV3(dvp);
+ KASSERT(cnp->cn_nameiop == CREATE);
+ KASSERT(cnp->cn_flags & HASBUF);
+
/*
* Oops, not for me..
*/
@@ -1694,8 +1715,7 @@ again:
} else if (v3 && (fmode & O_EXCL))
error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
if (!error) {
- if (cnp->cn_flags & MAKEENTRY)
- nfs_cache_enter(dvp, newvp, cnp);
+ nfs_cache_enter(dvp, newvp, cnp);
*ap->a_vpp = newvp;
}
PNBUF_PUT(cnp->cn_pnbuf);
@@ -1777,6 +1797,8 @@ nfs_remove(v)
} else if (!np->n_sillyrename)
error = nfs_sillyrename(dvp, vp, cnp);
PNBUF_PUT(cnp->cn_pnbuf);
+ if (!error)
+ cache_purge1(dvp, cnp, 0);
if (!error && nfs_getattrcache(vp, &vattr) == 0 &&
vattr.va_nlink == 1) {
np->n_flag |= NREMOVED;
@@ -1891,11 +1913,11 @@ nfs_rename(v)
VN_KNOTE(fdvp, NOTE_WRITE);
VN_KNOTE(tdvp, NOTE_WRITE);
- if (fvp->v_type == VDIR) {
- if (tvp != NULL && tvp->v_type == VDIR)
- cache_purge(tdvp);
- cache_purge(fdvp);
- }
+ if (fvp->v_type == VDIR)
+ cache_purge(fvp); /* we need to flush DOTDOT entry */
+ else
+ cache_purge1(fdvp, fcnp, 0);
+ cache_purge1(tdvp, tcnp, 0);
out:
if (tdvp == tvp)
vrele(tdvp);
@@ -2162,6 +2184,8 @@ nfs_mkdir(v)
struct mbuf *mreq, *mrep, *md, *mb;
const int v3 = NFS_ISV3(dvp);
+ KASSERT(cnp->cn_nameiop == CREATE);
+
len = cnp->cn_namelen;
nfsstats.rpccnt[NFSPROC_MKDIR]++;
nfsm_reqhead(dnp, NFSPROC_MKDIR,
@@ -2210,8 +2234,7 @@ nfs_mkdir(v)
vput(newvp);
} else {
VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
- if (cnp->cn_flags & MAKEENTRY)
- nfs_cache_enter(dvp, newvp, cnp);
+ nfs_cache_enter(dvp, newvp, cnp);
*ap->a_vpp = newvp;
}
PNBUF_PUT(cnp->cn_pnbuf);
--NextPart-20040403232804-0245300--