tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
rename(), wapbl and deadlock
Hi,
I got a look again at the ufs_rename patch that David Holland sent
at the end of september, and managed to get it working on netbsd-5.
Changes from his version (apart from the backport) that got it working:
- VOP_UNLOCK(ap->a_tvp) on entry, if not NULL. it's not used and we'll
do a lookup() to find it again.
- clear SAVESTART from from_name and to_name. otherwise relookup() will
VREF() the vnodes and we'll have a refcount leak (and so a vnode
leak).
- make sure to always vrele(illegal_vnode).
With these changes, I couldn't crash or hang a kernel using rsync
on a WAPBL filesystem. However, the system doen't come up multiuser
if / is not logged:
Building databases: devpanic: kernel diagnostic assertion "bn >= NDADDR"
failed: file "/dsk/l1/misc/bouyer/netbsd-5/src/sys/ufs/ufs/ufs_bmap.c", line 349
fatal breakpoint trap in supervisor mode
trap type 1 code 0 eip c05b947c cs 8 eflags 246 cr2 bbb74e84 ilevel 0
Stopped in pid 114.1 (dev_mkdb) at netbsd:breakpoint+0x4: popl %ebp
db{1}> tr
breakpoint(c0af5b1a,cc5f6768,c208d800,c04bee0e,0,0,c0457448,0,0,0) at
netbsd:breakpoint+0x4
panic(c0b03610,c0a675f1,c0a9f2ce,c0a9f2dc,15d,0,cc5f67dc,c044413d,c0a675f1,c0a9f2dc)
at netbsd:panic+0x1b0
__kernassert(c0a675f1,c0a9f2dc,15d,c0a9f2ce,c1f84d60,1,cc5f67bc,c0850696,ffffffff,ffffffff)
at netbsd:__kernassert+0x39
ufs_getlbns(cc61c968,ffffffff,ffffffff,cc5f6878,cc5f68b8,4,4,c04bee0e,0,0) at
netbsd:ufs_getlbns+0x1fd
ufs_bmaparray(cc61c968,ffffffff,ffffffff,c278ad18,0,0,0,c0443ef0,cc61c968,c278acec)
at netbsd:ufs_bmaparray+0x257
ufs_bmap(cc5f691c,cc5280e0,0,c0bf0a00,0,2000,c08a5280,cc61c968,ffffffff,ffffffff)
at netbsd:ufs_bmap+0x85
VOP_BMAP(cc61c968,ffffffff,ffffffff,0,c278ad18,0,cc681edc,cc61c968,cc61c968,0)
at netbsd:VOP_BMAP+0x7f
ufs_strategy(cc5f698c,2000,0,c04bee0e,0,0,c08a52c0,cc61c968,c278acec,c278acec)
at netbsd:ufs_strategy+0xb3
VOP_STRATEGY(cc61c968,c278acec,ffffffff,2000,0,0,0,cc5f6aa8,cc61c968,0) at
netbsd:VOP_STRATEGY+0x66
bio_doread(2000,ffffffff,0,0,cc681edc,d,1,ffffffff,cb2eee68,cb2ee8d0) at
netbsd:bio_doread+0x73
breadn(cc61c968,ffffffff,ffffffff,2000,cb2ee8d0,cb2eee68,0,ffffffff,1,cc5f6aa8)
at netbsd:breadn+0x47
ufs_blkatoff(cc61c968,fffffe30,ffffffff,cc5f6aec,cc5f6ae8,1,cc61c968,c04bf3da,8,c044b96d)
at netbsd:ufs_blkatoff+0x242
ufs_dirremove(cc61c968,cc69a9cc,8418,0,0,0,c04bcde3,cc681edc,cc681edc,8000) at
netbsd:ufs_dirremove+0x13b
ufs_rename(cc5f6bac,cc5280e0,6,c0be5d60,c27677e0,c0b23ec0,c08a4fc0,cc61c968,cc69bc48,cc5f6c90)
at netbsd:ufs_rename+0xb45
VOP_RENAME(cc61c968,cc69bc48,cc5f6c90,cc61c968,0,cc5f6c48,cc5f6c0c,c0850696,cc544354,0)
at netbsd:VOP_RENAME+0x7c
do_sys_rename(bfbfda5b,bfbfea5f,0,0,0,c0b22410,cc5f6d3c,c05c2798,cc5280e0,cc5f6d00)
at netbsd:do_sys_rename+0x5a9
sys_rename(cc5280e0,cc5f6d00,cc5f6d28,bbb74000,cb31fc6c,cb31fc6c,1,bfbfda5b,bfbfea5f,401)
at netbsd:sys_rename+0x26
but using the wapbl rename version for the non-wapbl seems to run stable.
I've not analysed whenever this is safe in case of crash though (and
won't say I can do it, I don't understand all what's going on in there).
Does anyone see a problem with this, or otherwise has comments ?
For now I only tested this on test boxes, I probably won't have time to do
more checks for the next 2 weeks.
--
Manuel Bouyer <bouyer%antioche.eu.org@localhost>
NetBSD: 26 ans d'experience feront toujours la difference
--
Index: ufs_extern.h
===================================================================
RCS file: /cvsroot/src/sys/ufs/ufs/ufs_extern.h,v
retrieving revision 1.60
diff -u -p -u -r1.60 ufs_extern.h
--- ufs_extern.h 31 May 2008 21:37:08 -0000 1.60
+++ ufs_extern.h 17 Dec 2009 21:15:23 -0000
@@ -129,7 +129,8 @@ int ufs_direnter(struct vnode *, struct
int ufs_dirremove(struct vnode *, struct inode *, int, int);
int ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, int, int);
int ufs_dirempty(struct inode *, ino_t, kauth_cred_t);
-int ufs_checkpath(struct inode *, struct inode *, kauth_cred_t);
+int ufs_parentcheck(struct vnode *, struct vnode *, kauth_cred_t,
+ int *, struct vnode **);
int ufs_blkatoff(struct vnode *, off_t, char **, struct buf **, bool);
/* ufs_quota.c */
Index: ufs_lookup.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ufs/ufs_lookup.c,v
retrieving revision 1.99
diff -u -p -u -r1.99 ufs_lookup.c
--- ufs_lookup.c 31 Jul 2008 05:38:06 -0000 1.99
+++ ufs_lookup.c 17 Dec 2009 21:15:23 -0000
@@ -1248,6 +1248,7 @@ ufs_dirempty(struct inode *ip, ino_t par
return (1);
}
+#if 0 /* XXX remove ufs_checkpath() before commit */
/*
* Check if source directory is in the path of the target directory.
* Target is supplied locked, source is unlocked.
@@ -1320,6 +1321,127 @@ out:
vput(vp);
return (error);
}
+#endif
+
+/*
+ * Extract the inode number of ".." from a directory.
+ * Helper for ufs_parentcheck.
+ */
+static int
+ufs_readdotdot(struct vnode *vp, int needswap, kauth_cred_t cred, ino_t
*result)
+{
+ struct dirtemplate dirbuf;
+ int namlen, error;
+
+ error = vn_rdwr(UIO_READ, vp, &dirbuf,
+ sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
+ IO_NODELOCKED, cred, NULL, NULL);
+ if (error) {
+ return error;
+ }
+
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+ if (FSFMT(vp) && needswap == 0)
+ namlen = dirbuf.dotdot_type;
+ else
+ namlen = dirbuf.dotdot_namlen;
+#else
+ if (FSFMT(vp) && needswap != 0)
+ namlen = dirbuf.dotdot_type;
+ else
+ namlen = dirbuf.dotdot_namlen;
+#endif
+ if (namlen != 2 ||
+ dirbuf.dotdot_name[0] != '.' ||
+ dirbuf.dotdot_name[1] != '.') {
+ printf("ufs_readdotdot: directory %llu contains "
+ "garbage instead of ..\n",
+ (unsigned long long) VTOI(vp)->i_number);
+ return ENOTDIR;
+ }
+ *result = ufs_rw32(dirbuf.dotdot_ino, needswap);
+ return 0;
+}
+
+/*
+ * Check if LOWER is a descendent of UPPER. If we find UPPER, return
+ * nonzero in FOUND and return a reference to the immediate descendent
+ * of UPPER in UPPERCHILD. If we don't find UPPER (that is, if we
+ * reach the volume root and that isn't UPPER), return zero in FOUND
+ * and null in UPPERCHILD.
+ *
+ * Neither UPPER nor LOWER should be locked.
+ *
+ * On error (such as a permissions error checking up the directory
+ * tree) fail entirely.
+ */
+int
+ufs_parentcheck(struct vnode *upper, struct vnode *lower, kauth_cred_t cred,
+ int *found_ret, struct vnode **upperchild_ret)
+{
+ ino_t upper_ino, found_ino;
+ struct vnode *current, *next;
+ int error;
+
+ if (upper == lower) {
+ VREF(upper);
+ *found_ret = 1;
+ *upperchild_ret = upper;
+ return 0;
+ }
+ if (VTOI(lower)->i_number == ROOTINO) {
+ *found_ret = 0;
+ *upperchild_ret = NULL;
+ return 0;
+ }
+
+ upper_ino = VTOI(upper)->i_number;
+
+ current = lower;
+ VREF(current);
+ vn_lock(current, LK_EXCLUSIVE | LK_RETRY);
+
+ for (;;) {
+ const int needswap = UFS_MPNEEDSWAP(VTOI(current)->i_ump);
+ error = ufs_readdotdot(current, needswap, cred, &found_ino);
+ if (error) {
+ vput(current);
+ return error;
+ }
+ if (found_ino == upper_ino) {
+ VOP_UNLOCK(current, 0);
+ *found_ret = 1;
+ *upperchild_ret = current;
+ return 0;
+ }
+ if (found_ino == ROOTINO) {
+ vput(current);
+ *found_ret = 0;
+ *upperchild_ret = NULL;
+ return 0;
+ }
+ VOP_UNLOCK(current, 0);
+ error = VFS_VGET(current->v_mount, found_ino, &next);
+ if (error) {
+ vrele(current);
+ return error;
+ }
+ KASSERT(VOP_ISLOCKED(next));
+ if (next->v_type != VDIR) {
+ printf("ufs_parentcheck: inode %llu reached via .. of "
+ "inode %llu is not a directory\n",
+ (unsigned long long)VTOI(next)->i_number,
+ (unsigned long long)VTOI(current)->i_number);
+ vput(next);
+ vrele(current);
+ return ENOTDIR;
+ }
+ vrele(current);
+ current = next;
+ }
+
+ return 0;
+}
#define UFS_DIRRABLKS 0
int ufs_dirrablks = UFS_DIRRABLKS;
Index: ufs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ufs/ufs_vnops.c,v
retrieving revision 1.169
diff -u -p -u -r1.169 ufs_vnops.c
--- ufs_vnops.c 14 Aug 2008 16:19:25 -0000 1.169
+++ ufs_vnops.c 17 Dec 2009 21:15:23 -0000
@@ -974,126 +974,489 @@ ufs_rename(void *v)
struct vnode *a_tvp;
struct componentname *a_tcnp;
} */ *ap = v;
- struct vnode *tvp, *tdvp, *fvp, *fdvp;
- struct componentname *tcnp, *fcnp;
- struct inode *ip, *xp, *dp;
+ struct vnode *fromparent_vnode;
+ struct inode *fromparent_i;
+ struct componentname *from_name;
+ struct vnode *fromchild_vnode;
+ struct inode *fromchild_i;
+
+ struct vnode *toparent_vnode;
+ struct inode *toparent_i;
+ struct componentname *to_name;
+ struct vnode *tochild_vnode;
+ struct inode *tochild_i;
+
+ struct vnode *illegal_vnode;
struct mount *mp;
struct direct *newdir;
- int doingdirectory, oldparent, newparent, error;
+ int error;
+ int oldparent, newparent;
+ int doingdirectory;
+ int foundfromparent;
+ int do_race_condition;
+ int noabort = 0;
#ifdef WAPBL
- if (ap->a_tdvp->v_mount->mnt_wapbl)
+ if ( /* ap->a_tdvp->v_mount->mnt_wapbl */ 1)
return wapbl_ufs_rename(v);
#endif
- tvp = ap->a_tvp;
- tdvp = ap->a_tdvp;
- fvp = ap->a_fvp;
- fdvp = ap->a_fdvp;
- tcnp = ap->a_tcnp;
- fcnp = ap->a_fcnp;
- doingdirectory = oldparent = newparent = error = 0;
+ /*
+ * Due to oddities in namei and in the fs-independent code,
+ * currently we are called with references to all four vnodes,
+ * but with only toparent_vnode locked.
+ *
+ * In a sane world we'd be passed just the two parent vnodes,
+ * both unlocked, and two (string) names.
+ *
+ * For now we will begin by setting up to mimic the sane world.
+ */
+
+ fromparent_vnode = ap->a_fdvp;
+ fromparent_i = VTOI(fromparent_vnode);
+
+ from_name = ap->a_fcnp;
+ fromchild_vnode = NULL;
+
+ toparent_vnode = ap->a_tdvp;
+ toparent_i = VTOI(toparent_vnode);
+
+ to_name = ap->a_tcnp;
+ tochild_vnode = NULL;
+
+ VOP_UNLOCK(toparent_vnode, 0);
+ if (ap->a_tvp)
+ VOP_UNLOCK(ap->a_tvp, 0);
+
+ illegal_vnode = NULL;
+ do_race_condition = 0;
+
+ /*
+ * Part 1: check for broken args and illegal operations.
+ */
#ifdef DIAGNOSTIC
- if ((tcnp->cn_flags & HASBUF) == 0 ||
- (fcnp->cn_flags & HASBUF) == 0)
+ /* We expect SAVESTART to have been set. */
+ if ((to_name->cn_flags & HASBUF) == 0 ||
+ (from_name->cn_flags & HASBUF) == 0)
panic("ufs_rename: no name");
#endif
- /*
- * Check for cross-device rename.
- */
- if ((fvp->v_mount != tdvp->v_mount) ||
- (tvp && (fvp->v_mount != tvp->v_mount))) {
+
+ /* Cross-device rename is prohibited. */
+ if (fromparent_vnode->v_mount != toparent_vnode->v_mount) {
error = EXDEV;
- abortit:
- VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
- if (tdvp == tvp)
- vrele(tdvp);
- else
- vput(tdvp);
- if (tvp)
- vput(tvp);
- VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
- vrele(fdvp);
- vrele(fvp);
- return (error);
+ goto fail_early;
+ }
+
+ /* If either parent isn't a directory, just give up now. */
+ if (fromparent_vnode->v_type != VDIR ||
+ toparent_vnode->v_type != VDIR) {
+ error = ENOTDIR;
+ goto fail_early;
+ }
+
+ /* The "." and ".." entries may not be renamed. */
+ if ((from_name->cn_namelen == 1 && from_name->cn_nameptr[0] == '.') ||
+ (from_name->cn_flags & ISDOTDOT) ||
+ (to_name->cn_flags & ISDOTDOT)) {
+ error = EINVAL;
+ goto fail_early;
}
/*
- * Check if just deleting a link name.
+ * Part 2: get locks.
+ *
+ * We lock parent vnodes before child vnodes. This means in
+ * particular that if A is above B in the directory tree then
+ * A must be locked before B. (This is true regardless of how
+ * many steps appear in between, because an arbitrary number
+ * of other processes could lock parent/child in between and
+ * establish a lock cycle and deadlock.)
+ *
+ * Therefore, if TOPARENT is above FROMPARENT we must lock
+ * TOPARENT first; if FROMPARENT is above TOPARENT we must
+ * lock FROMPARENT first; and if they're incommensurate it
+ * doesn't matter. (But, we rely on the fact that there's a
+ * whole-volume rename lock to prevent deadlock among groups
+ * of renames upon overlapping sets of incommensurate vnodes.)
+ *
+ * In addition to establishing lock ordering the parent check
+ * also serves to rule out cases where someone tries to move a
+ * directory underneath itself, e.g. rename("a/b", "a/b/c").
+ * If allowed to proceed such renames would detach portions of
+ * the directory tree and make fsck very unhappy.
+ *
+ * Note that it is an error for *FROMCHILD* to be above
+ * TOPARENT; however, *FROMPARENT* can be above TOPARENT, as
+ * in rename("a/b", "a/c/d").
+ *
+ * The parent check searches up the tree from TOPARENT until
+ * it either finds FROMPARENT or the root of the volume. It
+ * also returns the vnode it saw immediately before TOPARENT,
+ * if any. Later on (after looking up FROMCHILD) we will check
+ * to see if this *is* FROMCHILD and if so fail.
+ *
+ * If the parent check finds FROMPARENT, it means FROMPARENT
+ * is above TOPARENT, so we lock FROMPARENT first and then
+ * TOPARENT. Otherwise, either TOPARENT is above FROMPARENT or
+ * they're incommensurate and we lock TOPARENT first.
+ *
+ * In either case each of the child vnodes has to be looked up
+ * and locked immediately after its parent. The two cases
+ *
+ * FROMPARENT/FROMCHILD/TOPARENT/TOCHILD
+ * TOPARENT/TOCHILD/FROMPARENT/FROMCHILD
+ *
+ * can cause deadlock otherwise. (Note that both of these are
+ * error cases; the first fails the parent check and the
+ * second fails because TOCHILD isn't empty. The parent check
+ * case can be handled without attempting to lock FROMCHILD,
+ * (XXX or at least it could be in a saner world of namei),
+ * but the nonempty case requires locking TOCHILD to test.
+ *
+ * Therefore the procedure is either
+ *
+ * lock FROMPARENT
+ * lookup FROMCHILD
+ * lock FROMCHILD
+ * lock TOPARENT
+ * lookup TOCHILD
+ * lock TOCHILD
+ *
+ * or
+ *
+ * lock TOPARENT
+ * lookup TOCHILD
+ * lock TOCHILD
+ * lock FROMPARENT
+ * lookup FROMCHILD
+ * lock FROMCHILD
+ *
+ * In a saner namei world we could simplify this (some) by always
+ * handling FROMCHILD last, but that isn't currently possible.
+ *
+ * Note that for now we aren't doing lookup so much as relookup()
+ * and checking what we find against what was passed down from the
+ * fs-independent code.
+ *
+ * On top of all the above, just to make everything more
+ * exciting, any two of the vnodes might end up being the same.
+ *
+ * FROMPARENT == FROMCHILD mv a/. foo is an error.
+ * FROMPARENT == TOPARENT mv a/b a/c is ok.
+ * FROMPARENT == TOCHILD mv a/b/c a/b will give ENOTEMPTY.
+ * FROMCHILD == TOPARENT mv a/b a/b/c fails the parent check.
+ * FROMCHILD == TOCHILD mv a/b a/b is ok.
+ * TOPARENT == TOCHILD mv foo a/. is an error.
+ *
+ * This introduces more cases in the locking, because each
+ * distinct vnode must be locked exactly once.
+ *
+ * When FROMPARENT == TOPARENT and FROMCHILD != TOCHILD we
+ * assume it doesn't matter what order the children are locked
+ * in, because the per-volume rename lock excludes other
+ * renames and no other operation locks two files in the same
+ * directory at once. (Note: if it turns out that link() does,
+ * link() is wrong.)
*/
- if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
- (VTOI(tdvp)->i_flags & APPEND))) {
- error = EPERM;
- goto abortit;
+
+ /*
+ * XXX until such time as we can do lookups without the namei
+ * and lookup machinery "helpfully" locking the result vnode
+ * for us, we can't avoid tripping on cases where FROMCHILD ==
+ * TOCHILD. The right way to do this is to check after looking
+ * the second one up and only lock it if it's different. Easy,
+ * but requires having the right underlying abstractions.
+ *
+ * Instead, we'll have to check the child vnode passed down in
+ * the argument block. For the branch where we look up
+ * FROMCHILD first we do this:
+ *
+ * - check if FROMCHILD is the same as the TOCHILD passed
+ * in (ap->a_tvp)
+ * - if so, unlock FROMCHILD before looking up TOCHILD
+ * and set do_race_condition
+ * - afterward if they aren't the same lock FROMCHILD again.
+ *
+ * Eww.
+ *
+ * On the plus side, this method at least shouldn't be able to
+ * deadlock when relocking FROMCHILD.
+ *
+ * Needless to say this is a race condition and should be
+ * fixed up once namei has been rendered sufficiently sane.
+ */
+
+ // XXX do we need this?
+ from_name->cn_flags &= ~SAVESTART;
+ to_name->cn_flags &= ~SAVESTART;
+ // XXX or this form instead?
+ //from_name->cn_flags &= ~(MODMASK | SAVESTART);
+ //from_name->cn_flags |= LOCKPARENT | LOCKLEAF;
+
+ if (fromparent_vnode == toparent_vnode) {
+ vn_lock(fromparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+ error = relookup(fromparent_vnode, &fromchild_vnode,
+ from_name);
+ if (error) {
+ VOP_UNLOCK(fromparent_vnode, 0);
+ goto fail_nolocks;
+ }
+ /* XXX see note above */
+ if (fromchild_vnode == ap->a_tvp) {
+ VOP_UNLOCK(fromchild_vnode, 0);
+ do_race_condition = 1;
+ }
+ error = relookup(toparent_vnode, &tochild_vnode,
+ to_name);
+ if (error && error != ENOENT) {
+ /*VOP_UNLOCK(toparent_vnode); -- same as fromparent */
+ VOP_UNLOCK(fromchild_vnode, 0);
+ VOP_UNLOCK(fromparent_vnode, 0);
+ goto fail_nolocks;
+ }
+ if (error == ENOENT) {
+ /*
+ * Note: currently in this case relookup() succeeds
+ * and sets tochild_vnode = NULL, but this piece of
+ * logic will be wanted in the (saner) future.
+ * (XXX: remove this comment when appropriate)
+ */
+ tochild_vnode = NULL;
+ }
+ if (do_race_condition) {
+ if (tochild_vnode != fromchild_vnode) {
+ vn_lock(fromchild_vnode,
+ LK_EXCLUSIVE | LK_RETRY);
+ }
+ }
}
- if (fvp == tvp) {
- if (fvp->v_type == VDIR) {
- error = EINVAL;
- goto abortit;
+ else {
+ error = ufs_parentcheck(fromparent_vnode, toparent_vnode,
+ from_name->cn_cred,
+ &foundfromparent, &illegal_vnode);
+ if (error) {
+ goto fail_nolocks;
+ }
+ if (foundfromparent) {
+ vn_lock(fromparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+ error = relookup(fromparent_vnode, &fromchild_vnode,
+ from_name);
+ if (error) {
+ VOP_UNLOCK(fromparent_vnode, 0);
+ goto fail_nolocks;
+ }
+ if (fromchild_vnode == toparent_vnode) {
+ VOP_UNLOCK(fromparent_vnode, 0);
+ VOP_UNLOCK(fromchild_vnode, 0);
+ error = EINVAL;
+ goto fail_nolocks;
+ }
+ vn_lock(toparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+ /* XXX see note above */
+ if (fromchild_vnode == ap->a_tvp) {
+ VOP_UNLOCK(fromchild_vnode, 0);
+ do_race_condition = 1;
+ }
+ error = relookup(toparent_vnode, &tochild_vnode,
+ to_name);
+ if (error && error != ENOENT) {
+ VOP_UNLOCK(toparent_vnode, 0);
+ VOP_UNLOCK(fromchild_vnode, 0);
+ VOP_UNLOCK(fromparent_vnode, 0);
+ goto fail_nolocks;
+ }
+ if (error == ENOENT) {
+ tochild_vnode = NULL;
+ }
+ if (do_race_condition) {
+ if (tochild_vnode != fromchild_vnode) {
+ vn_lock(fromchild_vnode,
+ LK_EXCLUSIVE | LK_RETRY);
+ }
+ }
+ }
+ else {
+ vn_lock(toparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+ error = relookup(toparent_vnode, &tochild_vnode,
+ to_name);
+ if (error && error != ENOENT) {
+ VOP_UNLOCK(toparent_vnode, 0);
+ goto fail_nolocks;
+ }
+ if (error == ENOENT) {
+ tochild_vnode = NULL;
+ }
+ if (fromparent_vnode == tochild_vnode) {
+ VOP_UNLOCK(tochild_vnode, 0);
+ VOP_UNLOCK(toparent_vnode, 0);
+ error = ENOTEMPTY;
+ goto fail_nolocks;
+ }
+ vn_lock(fromparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+ /* XXX see note above */
+ if (tochild_vnode && tochild_vnode == ap->a_fvp) {
+ VOP_UNLOCK(tochild_vnode, 0);
+ do_race_condition = 1;
+ }
+ error = relookup(fromparent_vnode, &fromchild_vnode,
+ from_name);
+ if (error) {
+ VOP_UNLOCK(fromparent_vnode, 0);
+ VOP_UNLOCK(tochild_vnode, 0);
+ VOP_UNLOCK(toparent_vnode, 0);
+ goto fail_nolocks;
+ }
+ if (do_race_condition) {
+ if (fromchild_vnode != tochild_vnode) {
+ vn_lock(tochild_vnode,
+ LK_EXCLUSIVE | LK_RETRY);
+ }
+ }
}
+ }
- /* Release destination completely. */
- VOP_ABORTOP(tdvp, tcnp);
- vput(tdvp);
- vput(tvp);
+ /*
+ * Note: after relookup to get fromchild_vnode and
+ * tochild_vnode, we have extra references to (what may or may
+ * not be the same) vnodes in ap->a_fvp and ap->a_tvp. These
+ * are cleaned up at function exit below.
+ */
- /* Delete source. */
- vrele(fvp);
- fcnp->cn_flags &= ~(MODMASK | SAVESTART);
- fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
- fcnp->cn_nameiop = DELETE;
- vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
- if ((error = relookup(fdvp, &fvp, fcnp))) {
- vput(fdvp);
- return (error);
- }
- return (VOP_REMOVE(fdvp, fvp, fcnp));
+ /* Now that we have fromchild, make sure it passes the parent check */
+ if (fromchild_vnode == illegal_vnode) {
+ error = EINVAL;
+ goto fail_withlocks;
}
- if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
- goto abortit;
- dp = VTOI(fdvp);
- ip = VTOI(fvp);
- if ((nlink_t) ip->i_nlink >= LINK_MAX) {
- VOP_UNLOCK(fvp, 0);
- error = EMLINK;
- goto abortit;
+ if (illegal_vnode)
+ vrele(illegal_vnode);
+ illegal_vnode = NULL;
+
+ /*
+ * All four vnodes are now locked, and we can proceed to do
+ * the real work.
+ */
+
+ KASSERT(VOP_ISLOCKED(fromparent_vnode));
+ KASSERT(VOP_ISLOCKED(fromchild_vnode));
+ KASSERT(VOP_ISLOCKED(toparent_vnode));
+ KASSERT(tochild_vnode == NULL || VOP_ISLOCKED(tochild_vnode));
+
+ /* Get the rest of the inodes. */
+ fromchild_i = VTOI(fromchild_vnode);
+ tochild_i = tochild_vnode ? VTOI(tochild_vnode) : NULL;
+
+ KASSERT(fromparent_i != fromchild_i);
+ KASSERT(tochild_i == NULL || toparent_i != tochild_i);
+
+ /*
+ * Part 3. More checks.
+ */
+
+ /* More cross-directory cases. */
+ if (fromparent_vnode->v_mount != fromchild_vnode->v_mount) {
+ error = EXDEV;
+ goto fail_withlocks;
+ }
+ if (tochild_vnode != NULL &&
+ toparent_vnode->v_mount != tochild_vnode->v_mount) {
+ error = EXDEV;
+ goto fail_withlocks;
}
- if ((ip->i_flags & (IMMUTABLE | APPEND)) ||
- (dp->i_flags & APPEND)) {
- VOP_UNLOCK(fvp, 0);
+
+ /* Check flags. */
+ if ((fromparent_i->i_flags & APPEND) != 0) {
error = EPERM;
- goto abortit;
+ goto fail_withlocks;
}
- if ((ip->i_mode & IFMT) == IFDIR) {
- /*
- * Avoid ".", "..", and aliases of "." for obvious reasons.
- */
- if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
- dp == ip ||
- (fcnp->cn_flags & ISDOTDOT) ||
- (tcnp->cn_flags & ISDOTDOT) ||
- (ip->i_flag & IN_RENAME)) {
- VOP_UNLOCK(fvp, 0);
- error = EINVAL;
- goto abortit;
- }
- ip->i_flag |= IN_RENAME;
- oldparent = dp->i_number;
- doingdirectory = 1;
+ if ((fromchild_i->i_flags & (IMMUTABLE | APPEND)) != 0) {
+ error = EPERM;
+ goto fail_withlocks;
+ }
+ if ((toparent_i->i_flags & APPEND) != 0) {
+ error = EPERM;
+ goto fail_withlocks;
+ }
+ if (tochild_vnode != NULL &&
+ (tochild_i->i_flags & (IMMUTABLE | APPEND)) != 0) {
+ error = EPERM;
+ goto fail_withlocks;
+ }
+
+ /* Moving a directory over itself is not allowed. */
+ if (fromchild_vnode == tochild_vnode &&
+ fromchild_vnode->v_type == VDIR) {
+ error = EINVAL;
+ goto fail_withlocks;
+ }
+
+ /* Make sure bumping the target's link count won't overflow. */
+ if ((nlink_t) fromchild_i->i_nlink >= LINK_MAX) {
+ error = EMLINK;
+ goto fail_withlocks;
+ }
+
+ /* This flag is now useless and should be removed. */
+ if (fromchild_i->i_flag & IN_RENAME) {
+ error = EINVAL;
+ goto fail_withlocks;
}
- VN_KNOTE(fdvp, NOTE_WRITE); /* XXXLUKEM/XXX: right place? */
/*
- * When the target exists, both the directory
- * and target vnodes are returned locked.
+ * Part 4. Rename logic.
*/
- dp = VTOI(tdvp);
- xp = NULL;
- if (tvp)
- xp = VTOI(tvp);
- mp = fdvp->v_mount;
+ /* If moving a file over itself, just unlink the from end. */
+ if (fromchild_vnode == tochild_vnode) {
+ /* This case was ruled out above. */
+ KASSERT(fromchild_vnode->v_type != VDIR);
+
+ /* Release to-side completely. */
+ VOP_ABORTOP(toparent_vnode, to_name);
+ if (toparent_vnode != fromparent_vnode) {
+ VOP_UNLOCK(toparent_vnode, 0);
+ }
+ vrele(toparent_vnode);
+ vrele(tochild_vnode);
+
+ /* Clean up a_fvp/a_tvp. */
+ vrele(ap->a_fvp);
+ if (ap->a_tvp != NULL) {
+ vrele(ap->a_tvp);
+ }
+
+ /* Now unlink the from-side. */
+ from_name->cn_flags &= ~(MODMASK | SAVESTART);
+ from_name->cn_flags |= LOCKPARENT | LOCKLEAF;
+ from_name->cn_nameiop = DELETE;
+ return VOP_REMOVE(fromparent_vnode, fromchild_vnode, from_name);
+ }
+
+ oldparent = fromparent_i->i_number;
+ newparent = toparent_i->i_number;
+ doingdirectory = (fromchild_i->i_mode & IFMT) == IFDIR;
+
+ /* XXX update the logic below to not test "newparent!=0" */
+ if (oldparent == newparent) {
+ newparent = 0;
+ }
+
+ /* Check permissions. */
+ error = VOP_ACCESS(fromchild_vnode, VWRITE, to_name->cn_cred);
+ if (error) {
+ goto fail_withlocks;
+ }
+
+ noabort = 1;
+
+ if (doingdirectory) {
+ fromchild_i->i_flag |= IN_RENAME;
+ }
+
+ /* XXXLUKEM/XXX: right place? */
+ VN_KNOTE(fromparent_vnode, NOTE_WRITE);
+
+ mp = fromparent_vnode->v_mount;
fstrans_start(mp, FSTRANS_SHARED);
/*
@@ -1102,62 +1465,27 @@ ufs_rename(void *v)
* completing our work, the link count
* may be wrong, but correctable.
*/
- ip->i_ffs_effnlink++;
- ip->i_nlink++;
- DIP_ASSIGN(ip, nlink, ip->i_nlink);
- ip->i_flag |= IN_CHANGE;
- if (DOINGSOFTDEP(fvp))
- softdep_change_linkcnt(ip);
- if ((error = UFS_UPDATE(fvp, NULL, NULL, UPDATE_DIROP)) != 0) {
- VOP_UNLOCK(fvp, 0);
- goto bad;
+ fromchild_i->i_ffs_effnlink++;
+ fromchild_i->i_nlink++;
+ DIP_ASSIGN(fromchild_i, nlink, fromchild_i->i_nlink);
+ fromchild_i->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(fromchild_vnode))
+ softdep_change_linkcnt(fromchild_i);
+ error = UFS_UPDATE(fromchild_vnode, NULL, NULL, UPDATE_DIROP);
+ if (error) {
+ goto fail_withfstrans;
}
/*
- * If ".." must be changed (ie the directory gets a new
- * parent) then the source directory must not be in the
- * directory hierarchy above the target, as this would
- * orphan everything below the source directory. Also
- * the user must have write permission in the source so
- * as to be able to change "..". We must repeat the call
- * to namei, as the parent directory is unlocked by the
- * call to checkpath().
- */
- error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
- VOP_UNLOCK(fvp, 0);
- if (oldparent != dp->i_number)
- newparent = dp->i_number;
- if (doingdirectory && newparent) {
- if (error) /* write access check above */
- goto bad;
- if (xp != NULL)
- vput(tvp);
- vref(tdvp); /* compensate for the ref checkpath loses */
- if ((error = ufs_checkpath(ip, dp, tcnp->cn_cred)) != 0) {
- vrele(tdvp);
- goto out;
- }
- tcnp->cn_flags &= ~SAVESTART;
- vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
- error = relookup(tdvp, &tvp, tcnp);
- if (error != 0) {
- vput(tdvp);
- goto out;
- }
- dp = VTOI(tdvp);
- xp = NULL;
- if (tvp)
- xp = VTOI(tvp);
- }
- /*
* 2) If target doesn't exist, link the target
* to the source and unlink the source.
* Otherwise, rewrite the target directory
* entry to reference the source inode and
* expunge the original entry's existence.
*/
- if (xp == NULL) {
- if (dp->i_dev != ip->i_dev)
+
+ if (tochild_i == NULL) {
+ if (toparent_i->i_dev != fromchild_i->i_dev)
panic("rename: EXDEV");
/*
* Account for ".." in new directory.
@@ -1165,53 +1493,53 @@ ufs_rename(void *v)
* parent we don't fool with the link count.
*/
if (doingdirectory && newparent) {
- if ((nlink_t)dp->i_nlink >= LINK_MAX) {
+ if ((nlink_t)toparent_i->i_nlink >= LINK_MAX) {
error = EMLINK;
- goto bad;
+ goto fail_withlinkcount;
}
- dp->i_ffs_effnlink++;
- dp->i_nlink++;
- DIP_ASSIGN(dp, nlink, dp->i_nlink);
- dp->i_flag |= IN_CHANGE;
- if (DOINGSOFTDEP(tdvp))
- softdep_change_linkcnt(dp);
- if ((error = UFS_UPDATE(tdvp, NULL, NULL,
+ toparent_i->i_ffs_effnlink++;
+ toparent_i->i_nlink++;
+ DIP_ASSIGN(toparent_i, nlink, toparent_i->i_nlink);
+ toparent_i->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(toparent_vnode))
+ softdep_change_linkcnt(toparent_i);
+ if ((error = UFS_UPDATE(toparent_vnode, NULL, NULL,
UPDATE_DIROP)) != 0) {
- dp->i_ffs_effnlink--;
- dp->i_nlink--;
- DIP_ASSIGN(dp, nlink, dp->i_nlink);
- dp->i_flag |= IN_CHANGE;
- if (DOINGSOFTDEP(tdvp))
- softdep_change_linkcnt(dp);
- goto bad;
+ toparent_i->i_ffs_effnlink--;
+ toparent_i->i_nlink--;
+ DIP_ASSIGN(toparent_i, nlink,
toparent_i->i_nlink);
+ toparent_i->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(toparent_vnode))
+ softdep_change_linkcnt(toparent_i);
+ goto fail_withlinkcount;
}
}
newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
- ufs_makedirentry(ip, tcnp, newdir);
- error = ufs_direnter(tdvp, NULL, newdir, tcnp, NULL);
+ ufs_makedirentry(fromchild_i, to_name, newdir);
+ error = ufs_direnter(toparent_vnode, NULL, newdir, to_name,
NULL);
pool_cache_put(ufs_direct_cache, newdir);
if (error != 0) {
if (doingdirectory && newparent) {
- dp->i_ffs_effnlink--;
- dp->i_nlink--;
- DIP_ASSIGN(dp, nlink, dp->i_nlink);
- dp->i_flag |= IN_CHANGE;
- if (DOINGSOFTDEP(tdvp))
- softdep_change_linkcnt(dp);
- (void)UFS_UPDATE(tdvp, NULL, NULL,
- UPDATE_WAIT|UPDATE_DIROP);
+ toparent_i->i_ffs_effnlink--;
+ toparent_i->i_nlink--;
+ DIP_ASSIGN(toparent_i, nlink,
toparent_i->i_nlink);
+ toparent_i->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(toparent_vnode))
+ softdep_change_linkcnt(toparent_i);
+ (void)UFS_UPDATE(toparent_vnode, NULL, NULL,
+ UPDATE_WAIT | UPDATE_DIROP);
}
- goto bad;
+ goto fail_withlinkcount;
}
- VN_KNOTE(tdvp, NOTE_WRITE);
- vput(tdvp);
+ VN_KNOTE(toparent_vnode, NOTE_WRITE);
} else {
- if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
+ if (tochild_i->i_dev != toparent_i->i_dev ||
+ tochild_i->i_dev != fromchild_i->i_dev)
panic("rename: EXDEV");
/*
* Short circuit rename(foo, foo).
*/
- if (xp->i_number == ip->i_number)
+ if (tochild_i->i_number == fromchild_i->i_number)
panic("rename: same file");
/*
* If the parent directory is "sticky", then the user must
@@ -1219,49 +1547,51 @@ ufs_rename(void *v)
* otherwise the destination may not be changed (except by
* root). This implements append-only directories.
*/
- if ((dp->i_mode & S_ISTXT) &&
- kauth_authorize_generic(tcnp->cn_cred,
+ if ((toparent_i->i_mode & S_ISTXT) &&
+ kauth_authorize_generic(to_name->cn_cred,
KAUTH_GENERIC_ISSUSER, NULL) != 0 &&
- kauth_cred_geteuid(tcnp->cn_cred) != dp->i_uid &&
- xp->i_uid != kauth_cred_geteuid(tcnp->cn_cred)) {
+ kauth_cred_geteuid(to_name->cn_cred) != toparent_i->i_uid &&
+ tochild_i->i_uid != kauth_cred_geteuid(to_name->cn_cred)) {
error = EPERM;
- goto bad;
+ goto fail_withlinkcount;
}
/*
* Target must be empty if a directory and have no links
* to it. Also, ensure source and target are compatible
* (both directories, or both not directories).
*/
- if ((xp->i_mode & IFMT) == IFDIR) {
- if (xp->i_ffs_effnlink > 2 ||
- !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
+ if ((tochild_i->i_mode & IFMT) == IFDIR) {
+ if (tochild_i->i_ffs_effnlink > 2 ||
+ !ufs_dirempty(tochild_i, toparent_i->i_number,
+ to_name->cn_cred)) {
error = ENOTEMPTY;
- goto bad;
+ goto fail_withlinkcount;
}
if (!doingdirectory) {
error = ENOTDIR;
- goto bad;
+ goto fail_withlinkcount;
}
- cache_purge(tdvp);
+ cache_purge(toparent_vnode);
} else if (doingdirectory) {
error = EISDIR;
- goto bad;
+ goto fail_withlinkcount;
}
- if ((error = ufs_dirrewrite(dp, xp, ip->i_number,
- IFTODT(ip->i_mode), doingdirectory && newparent ?
+ if ((error = ufs_dirrewrite(toparent_i, tochild_i,
+ fromchild_i->i_number,
+ IFTODT(fromchild_i->i_mode), doingdirectory && newparent ?
newparent : doingdirectory, IN_CHANGE | IN_UPDATE)) != 0)
- goto bad;
+ goto fail_withlinkcount;
if (doingdirectory) {
if (!newparent) {
- dp->i_ffs_effnlink--;
- if (DOINGSOFTDEP(tdvp))
- softdep_change_linkcnt(dp);
- }
- xp->i_ffs_effnlink--;
- if (DOINGSOFTDEP(tvp))
- softdep_change_linkcnt(xp);
+ toparent_i->i_ffs_effnlink--;
+ if (DOINGSOFTDEP(toparent_vnode))
+ softdep_change_linkcnt(toparent_i);
+ }
+ tochild_i->i_ffs_effnlink--;
+ if (DOINGSOFTDEP(tochild_vnode))
+ softdep_change_linkcnt(tochild_i);
}
- if (doingdirectory && !DOINGSOFTDEP(tvp)) {
+ if (doingdirectory && !DOINGSOFTDEP(tochild_vnode)) {
/*
* Truncate inode. The only stuff left in the directory
* is "." and "..". The "." reference is inconsequential
@@ -1274,48 +1604,24 @@ ufs_rename(void *v)
* them now.
*/
if (!newparent) {
- dp->i_nlink--;
- DIP_ASSIGN(dp, nlink, dp->i_nlink);
- dp->i_flag |= IN_CHANGE;
- }
- xp->i_nlink--;
- DIP_ASSIGN(xp, nlink, xp->i_nlink);
- xp->i_flag |= IN_CHANGE;
- if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC,
- tcnp->cn_cred)))
- goto bad;
- }
- VN_KNOTE(tdvp, NOTE_WRITE);
- vput(tdvp);
- VN_KNOTE(tvp, NOTE_DELETE);
- vput(tvp);
- xp = NULL;
+ toparent_i->i_nlink--;
+ DIP_ASSIGN(toparent_i, nlink,
toparent_i->i_nlink);
+ toparent_i->i_flag |= IN_CHANGE;
+ }
+ tochild_i->i_nlink--;
+ DIP_ASSIGN(tochild_i, nlink, tochild_i->i_nlink);
+ tochild_i->i_flag |= IN_CHANGE;
+ if ((error = UFS_TRUNCATE(tochild_vnode, (off_t)0,
+ IO_SYNC, to_name->cn_cred)))
+ goto fail_withlinkcount;
+ }
+ VN_KNOTE(toparent_vnode, NOTE_WRITE);
+ VN_KNOTE(tochild_vnode, NOTE_DELETE);
}
/*
* 3) Unlink the source.
*/
- fcnp->cn_flags &= ~(MODMASK | SAVESTART);
- fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
- vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
- if ((error = relookup(fdvp, &fvp, fcnp))) {
- vput(fdvp);
- vrele(ap->a_fvp);
- goto out2;
- }
- if (fvp != NULL) {
- xp = VTOI(fvp);
- dp = VTOI(fdvp);
- } else {
- /*
- * From name has disappeared.
- */
- if (doingdirectory)
- panic("rename: lost dir entry");
- vrele(ap->a_fvp);
- error = 0;
- goto out2;
- }
/*
* Ensure that the directory entry still exists and has not
* changed while the new name has been entered. If the source is
@@ -1325,58 +1631,89 @@ ufs_rename(void *v)
* flag ensures that it cannot be moved by another rename or removed
* by a rmdir.
*/
- if (xp != ip) {
- if (doingdirectory)
- panic("rename: lost dir entry");
- } else {
- /*
- * If the source is a directory with a
- * new parent, the link count of the old
- * parent directory must be decremented
- * and ".." set to point to the new parent.
- */
- if (doingdirectory && newparent) {
- KASSERT(dp != NULL);
- xp->i_offset = mastertemplate.dot_reclen;
- ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0, IN_CHANGE);
- cache_purge(fdvp);
- }
- error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
- xp->i_flag &= ~IN_RENAME;
- }
- VN_KNOTE(fvp, NOTE_RENAME);
- if (dp)
- vput(fdvp);
- if (xp)
- vput(fvp);
- vrele(ap->a_fvp);
- goto out2;
- /* exit routines from steps 1 & 2 */
- bad:
- if (xp)
- vput(ITOV(xp));
- vput(ITOV(dp));
- out:
- if (doingdirectory)
- ip->i_flag &= ~IN_RENAME;
- if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
- ip->i_ffs_effnlink--;
- ip->i_nlink--;
- DIP_ASSIGN(ip, nlink, ip->i_nlink);
- ip->i_flag |= IN_CHANGE;
- ip->i_flag &= ~IN_RENAME;
- if (DOINGSOFTDEP(fvp))
- softdep_change_linkcnt(ip);
- vput(fvp);
- } else
- vrele(fvp);
- vrele(fdvp);
+ /*
+ * If the source is a directory with a
+ * new parent, the link count of the old
+ * parent directory must be decremented
+ * and ".." set to point to the new parent.
+ */
+ if (doingdirectory && newparent) {
+ KASSERT(toparent_i != NULL);
+ fromchild_i->i_offset = mastertemplate.dot_reclen;
+ ufs_dirrewrite(fromchild_i, toparent_i,
+ newparent, DT_DIR, 0, IN_CHANGE);
+ cache_purge(fromparent_vnode);
+ }
+ error = ufs_dirremove(fromparent_vnode, fromchild_i,
+ from_name->cn_flags, 0);
+ VN_KNOTE(fromchild_vnode, NOTE_RENAME);
+
+ if (error) {
+ goto fail_withfstrans;
+ }
+ goto succeed;
- /* exit routines from step 3 */
- out2:
+fail_withlinkcount:
+ fromchild_i->i_nlink--;
+ fromchild_i->i_ffs_effnlink--;
+ DIP_ASSIGN(fromchild_i, nlink, fromchild_i->i_nlink);
+ fromchild_i->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(fromchild_vnode))
+ softdep_change_linkcnt(fromchild_i);
+
+fail_withfstrans:
+succeed:
fstrans_done(mp);
- return (error);
+ fromchild_i->i_flag &= ~IN_RENAME;
+
+fail_withlocks:
+ /* See comment up top about which of these can legally be the same. */
+ if (fromparent_vnode == toparent_vnode) {
+ VOP_UNLOCK(fromparent_vnode, 0);
+ } else {
+ VOP_UNLOCK(fromparent_vnode, 0);
+ VOP_UNLOCK(toparent_vnode, 0);
+ }
+ if (fromchild_vnode == tochild_vnode) {
+ VOP_UNLOCK(fromchild_vnode, 0);
+ } else {
+ VOP_UNLOCK(fromchild_vnode, 0);
+ if (tochild_vnode) {
+ VOP_UNLOCK(tochild_vnode, 0);
+ }
+ }
+
+fail_nolocks:
+ if (fromchild_vnode != NULL) {
+ vrele(fromchild_vnode);
+ fromchild_vnode = NULL;
+ }
+ if (tochild_vnode != NULL) {
+ vrele(tochild_vnode);
+ tochild_vnode = NULL;
+ }
+ if (illegal_vnode) {
+ vrele(illegal_vnode);
+ illegal_vnode = NULL;
+ }
+fail_early:
+ if (!noabort) {
+ /* XXX, why not in NFS? */
+ VOP_ABORTOP(fromparent_vnode, from_name);
+ VOP_ABORTOP(toparent_vnode, to_name);
+ }
+ vrele(fromparent_vnode);
+ vrele(toparent_vnode);
+
+ KASSERT(fromchild_vnode == NULL);
+ KASSERT(tochild_vnode == NULL);
+
+ vrele(ap->a_fvp);
+ if (ap->a_tvp != NULL)
+ vrele(ap->a_tvp);
+
+ return error;
}
int
Index: ufs_wapbl.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ufs/ufs_wapbl.c,v
retrieving revision 1.2.8.1
diff -u -p -u -r1.2.8.1 ufs_wapbl.c
--- ufs_wapbl.c 14 Dec 2008 11:56:04 -0000 1.2.8.1
+++ ufs_wapbl.c 17 Dec 2009 21:15:23 -0000
@@ -158,12 +158,27 @@ wapbl_ufs_rename(void *v)
struct vnode *a_tvp;
struct componentname *a_tcnp;
} */ *ap = v;
- struct vnode *tvp, *tdvp, *fvp, *fdvp;
- struct componentname *tcnp, *fcnp;
- struct inode *ip, *txp, *fxp, *tdp, *fdp;
+ struct vnode *fromparent_vnode;
+ struct inode *fromparent_i;
+ struct componentname *from_name;
+ struct vnode *fromchild_vnode;
+ struct inode *fromchild_i;
+
+ struct vnode *toparent_vnode;
+ struct inode *toparent_i;
+ struct componentname *to_name;
+ struct vnode *tochild_vnode;
+ struct inode *tochild_i;
+
+ struct vnode *illegal_vnode;
struct mount *mp;
struct direct *newdir;
- int doingdirectory, oldparent, newparent, error;
+ int error;
+ int oldparent, newparent;
+ int doingdirectory;
+ int foundfromparent;
+ int do_race_condition;
+ int noabort = 0;
int32_t saved_f_count;
doff_t saved_f_diroff;
@@ -175,224 +190,494 @@ wapbl_ufs_rename(void *v)
doff_t saved_t_offset;
u_int32_t saved_t_reclen;
- tvp = ap->a_tvp;
- tdvp = ap->a_tdvp;
- fvp = ap->a_fvp;
- fdvp = ap->a_fdvp;
- tcnp = ap->a_tcnp;
- fcnp = ap->a_fcnp;
- doingdirectory = oldparent = newparent = error = 0;
+ /*
+ * Due to oddities in namei and in the fs-independent code,
+ * currently we are called with references to all four vnodes,
+ * but with only toparent_vnode locked.
+ * In a sane world we'd be passed just the two parent vnodes,
+ * both unlocked, and two (string) names.
+ *
+ * For now we will begin by setting up to mimic the sane world.
+ */
+ fromparent_vnode = ap->a_fdvp;
+ fromparent_i = VTOI(fromparent_vnode);
+ from_name = ap->a_fcnp;
+ fromchild_vnode = NULL;
+
+ toparent_vnode = ap->a_tdvp;
+ toparent_i = VTOI(toparent_vnode);
+
+ to_name = ap->a_tcnp;
+ tochild_vnode = NULL;
+
+ VOP_UNLOCK(toparent_vnode, 0);
+ if (ap->a_tvp)
+ VOP_UNLOCK(ap->a_tvp, 0);
+
+ illegal_vnode = NULL;
+ do_race_condition = 0;
+
+ /* Because gcc 4.1 is not being real smart, zero all this */
+ saved_f_count = 0;
+ saved_f_diroff = 0;
+ saved_f_offset = 0;
+ saved_f_reclen = 0;
+ saved_t_count = 0;
+ saved_t_endoff = 0;
+ saved_t_diroff = 0;
+ saved_t_offset = 0;
+ saved_t_reclen = 0;
+
+ /*
+ * Part 1: check for broken args and illegal operations.
+ */
#ifdef DIAGNOSTIC
- if ((tcnp->cn_flags & HASBUF) == 0 ||
- (fcnp->cn_flags & HASBUF) == 0)
+ /* We expect SAVESTART to have been set. */
+ if ((to_name->cn_flags & HASBUF) == 0 ||
+ (from_name->cn_flags & HASBUF) == 0)
panic("ufs_rename: no name");
#endif
+ /* Cross-device rename is prohibited. */
+ if (fromparent_vnode->v_mount != toparent_vnode->v_mount) {
+ error = EXDEV;
+ goto fail_early;
+ }
+
+ /* If either parent isn't a directory, just give up now. */
+ if (fromparent_vnode->v_type != VDIR ||
+ toparent_vnode->v_type != VDIR) {
+ error = ENOTDIR;
+ goto fail_early;
+ }
+
+ /* The "." and ".." entries may not be renamed. */
+ if ((from_name->cn_namelen == 1 && from_name->cn_nameptr[0] == '.') ||
+ (from_name->cn_flags & ISDOTDOT) ||
+ (to_name->cn_flags & ISDOTDOT)) {
+ error = EINVAL;
+ goto fail_early;
+ }
+
+ /*
+ * Part 2: get locks.
+ *
+ * We lock parent vnodes before child vnodes. This means in
+ * particular that if A is above B in the directory tree then
+ * A must be locked before B. (This is true regardless of how
+ * many steps appear in between, because an arbitrary number
+ * of other processes could lock parent/child in between and
+ * establish a lock cycle and deadlock.)
+ *
+ * Therefore, if TOPARENT is above FROMPARENT we must lock
+ * TOPARENT first; if FROMPARENT is above TOPARENT we must
+ * lock FROMPARENT first; and if they're incommensurate it
+ * doesn't matter. (But, we rely on the fact that there's a
+ * whole-volume rename lock to prevent deadlock among groups
+ * of renames upon overlapping sets of incommensurate vnodes.)
+ *
+ * In addition to establishing lock ordering the parent check
+ * also serves to rule out cases where someone tries to move a
+ * directory underneath itself, e.g. rename("a/b", "a/b/c").
+ * If allowed to proceed such renames would detach portions of
+ * the directory tree and make fsck very unhappy.
+ *
+ * Note that it is an error for *FROMCHILD* to be above
+ * TOPARENT; however, *FROMPARENT* can be above TOPARENT, as
+ * in rename("a/b", "a/c/d").
+ *
+ * The parent check searches up the tree from TOPARENT until
+ * it either finds FROMPARENT or the root of the volume. It
+ * also returns the vnode it saw immediately before TOPARENT,
+ * if any. Later on (after looking up FROMCHILD) we will check
+ * to see if this *is* FROMCHILD and if so fail.
+ *
+ * If the parent check finds FROMPARENT, it means FROMPARENT
+ * is above TOPARENT, so we lock FROMPARENT first and then
+ * TOPARENT. Otherwise, either TOPARENT is above FROMPARENT or
+ * they're incommensurate and we lock TOPARENT first.
+ *
+ * In either case each of the child vnodes has to be looked up
+ * and locked immediately after its parent. The two cases
+ *
+ * FROMPARENT/FROMCHILD/TOPARENT/TOCHILD
+ * TOPARENT/TOCHILD/FROMPARENT/FROMCHILD
+ *
+ * can cause deadlock otherwise. (Note that both of these are
+ * error cases; the first fails the parent check and the
+ * second fails because TOCHILD isn't empty. The parent check
+ * case can be handled without attempting to lock FROMCHILD,
+ * (XXX or at least it could be in a saner world of namei),
+ * but the nonempty case requires locking TOCHILD to test.
+ * Therefore the procedure is either
+ *
+ * lock FROMPARENT
+ * lookup FROMCHILD
+ * lock FROMCHILD
+ * lock TOPARENT
+ * lookup TOCHILD
+ * lock TOCHILD
+ *
+ * or
+ *
+ * lock TOPARENT
+ * lookup TOCHILD
+ * lock TOCHILD
+ * lock FROMPARENT
+ * lookup FROMCHILD
+ * lock FROMCHILD
+ * In a saner namei world we could simplify this (some) by always
+ * handling FROMCHILD last, but that isn't currently possible.
+ *
+ * Note that for now we aren't doing lookup so much as relookup()
+ * and checking what we find against what was passed down from the
+ * fs-independent code.
+ * On top of all the above, just to make everything more
+ * exciting, any two of the vnodes might end up being the same.
+ *
+ * FROMPARENT == FROMCHILD mv a/. foo is an error.
+ * FROMPARENT == TOPARENT mv a/b a/c is ok.
+ * FROMPARENT == TOCHILD mv a/b/c a/b will give ENOTEMPTY.
+ * FROMCHILD == TOPARENT mv a/b a/b/c fails the parent check.
+ * FROMCHILD == TOCHILD mv a/b a/b is ok.
+ * TOPARENT == TOCHILD mv foo a/. is an error.
+ * This introduces more cases in the locking, because each
+ * distinct vnode must be locked exactly once.
+ *
+ * When FROMPARENT == TOPARENT and FROMCHILD != TOCHILD we
+ * assume it doesn't matter what order the children are locked
+ * in, because the per-volume rename lock excludes other
+ * renames and no other operation locks two files in the same
+ * directory at once. (Note: if it turns out that link() does,
+ * link() is wrong.)
+ */
+
+ /*
+ * XXX until such time as we can do lookups without the namei
+ * and lookup machinery "helpfully" locking the result vnode
+ * for us, we can't avoid tripping on cases where FROMCHILD ==
+ * TOCHILD. The right way to do this is to check after looking
+ * the second one up and only lock it if it's different. Easy,
+ * but requires having the right underlying abstractions.
+ *
+ * Instead, we'll have to check the child vnode passed down in
+ * the argument block. For the branch where we look up
+ * FROMCHILD first we do this:
+ *
+ * - check if FROMCHILD is the same as the TOCHILD passed
+ * in (ap->a_tvp)
+ * - if so, unlock FROMCHILD before looking up TOCHILD
+ * and set do_race_condition
+ * - afterward if they aren't the same lock FROMCHILD again.
+ *
+ * Eww.
+ *
+ * On the plus side, this method at least shouldn't be able to
+ * deadlock when relocking FROMCHILD.
+ *
+ * Needless to say this is a race condition and should be
+ * fixed up once namei has been rendered sufficiently sane.
+ */
+ // XXX do we need this?
+ from_name->cn_flags &= ~SAVESTART;
+ to_name->cn_flags &= ~SAVESTART;
+ // XXX or this form instead?
+ //from_name->cn_flags &= ~(MODMASK | SAVESTART);
+ //from_name->cn_flags |= LOCKPARENT | LOCKLEAF;
+
+ if (fromparent_vnode == toparent_vnode) {
+ vn_lock(fromparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+ error = relookup(fromparent_vnode, &fromchild_vnode,
+ from_name);
+ if (error) {
+ VOP_UNLOCK(fromparent_vnode, 0);
+ goto fail_nolocks;
+ }
+
+ /*
+ * save directory lookup information
+ */
+ saved_f_count = fromparent_i->i_count;
+ saved_f_diroff = fromparent_i->i_diroff;
+ saved_f_offset = fromparent_i->i_offset;
+ saved_f_reclen = fromparent_i->i_reclen;
+
+ /* XXX see note above */
+ if (fromchild_vnode == ap->a_tvp) {
+ VOP_UNLOCK(fromchild_vnode, 0);
+ do_race_condition = 1;
+ }
+ error = relookup(toparent_vnode, &tochild_vnode,
+ to_name);
+ if (error && error != ENOENT) {
+ /*VOP_UNLOCK(toparent_vnode); -- same as fromparent */
+ VOP_UNLOCK(fromchild_vnode, 0);
+ VOP_UNLOCK(fromparent_vnode, 0);
+ goto fail_nolocks;
+ }
+ if (error == ENOENT) {
+ /*
+ * Note: currently in this case relookup() succeeds
+ * and sets tochild_vnode = NULL, but this piece of
+ * logic will be wanted in the (saner) future.
+ * (XXX: remove this comment when appropriate)
+ */
+ tochild_vnode = NULL;
+ }
+ if (do_race_condition) {
+ if (tochild_vnode != fromchild_vnode) {
+ vn_lock(fromchild_vnode,
+ LK_EXCLUSIVE | LK_RETRY);
+ }
+ }
+ /*
+ * save directory lookup information
+ */
+ saved_t_count = toparent_i->i_count;
+ saved_t_endoff = toparent_i->i_endoff;
+ saved_t_diroff = toparent_i->i_diroff;
+ saved_t_offset = toparent_i->i_offset;
+ saved_t_reclen = toparent_i->i_reclen;
+ }
+ else {
+ error = ufs_parentcheck(fromparent_vnode, toparent_vnode,
+ from_name->cn_cred,
+ &foundfromparent, &illegal_vnode);
+ if (error) {
+ goto fail_nolocks;
+ }
+ if (foundfromparent) {
+ vn_lock(fromparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+ error = relookup(fromparent_vnode, &fromchild_vnode,
+ from_name);
+ if (error) {
+ VOP_UNLOCK(fromparent_vnode, 0);
+ goto fail_nolocks;
+ }
+ if (fromchild_vnode == toparent_vnode) {
+ VOP_UNLOCK(fromparent_vnode, 0);
+ VOP_UNLOCK(fromchild_vnode, 0);
+ error = EINVAL;
+ goto fail_nolocks;
+ }
+ vn_lock(toparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+ /* XXX see note above */
+ if (fromchild_vnode == ap->a_tvp) {
+ VOP_UNLOCK(fromchild_vnode, 0);
+ do_race_condition = 1;
+ }
+ error = relookup(toparent_vnode, &tochild_vnode,
+ to_name);
+ if (error && error != ENOENT) {
+ VOP_UNLOCK(toparent_vnode, 0);
+ VOP_UNLOCK(fromchild_vnode, 0);
+ VOP_UNLOCK(fromparent_vnode, 0);
+ goto fail_nolocks;
+ }
+ if (error == ENOENT) {
+ tochild_vnode = NULL;
+ }
+ if (do_race_condition) {
+ if (tochild_vnode != fromchild_vnode) {
+ vn_lock(fromchild_vnode,
+ LK_EXCLUSIVE | LK_RETRY);
+ }
+ }
+ }
+ else {
+ vn_lock(toparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+ error = relookup(toparent_vnode, &tochild_vnode,
+ to_name);
+ if (error && error != ENOENT) {
+ VOP_UNLOCK(toparent_vnode, 0);
+ goto fail_nolocks;
+ }
+ if (error == ENOENT) {
+ tochild_vnode = NULL;
+ }
+ if (fromparent_vnode == tochild_vnode) {
+ VOP_UNLOCK(tochild_vnode, 0);
+ VOP_UNLOCK(toparent_vnode, 0);
+ error = ENOTEMPTY;
+ goto fail_nolocks;
+ }
+ vn_lock(fromparent_vnode, LK_EXCLUSIVE | LK_RETRY);
+ /* XXX see note above */
+ if (tochild_vnode && tochild_vnode == ap->a_fvp) {
+ VOP_UNLOCK(tochild_vnode, 0);
+ do_race_condition = 1;
+ }
+ error = relookup(fromparent_vnode, &fromchild_vnode,
+ from_name);
+ if (error) {
+ VOP_UNLOCK(fromparent_vnode, 0);
+ VOP_UNLOCK(tochild_vnode, 0);
+ VOP_UNLOCK(toparent_vnode, 0);
+ goto fail_nolocks;
+ }
+ if (do_race_condition) {
+ if (fromchild_vnode != tochild_vnode) {
+ vn_lock(tochild_vnode,
+ LK_EXCLUSIVE | LK_RETRY);
+ }
+ }
+ }
+ }
/*
- * Check for cross-device rename.
+ * Note: after relookup to get fromchild_vnode and
+ * tochild_vnode, we have extra references to (what may or may
+ * not be the same) vnodes in ap->a_fvp and ap->a_tvp. These
+ * are cleaned up at function exit below.
*/
- if ((fvp->v_mount != tdvp->v_mount) ||
- (tvp && (fvp->v_mount != tvp->v_mount))) {
- error = EXDEV;
- abortit:
- VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
- if (tdvp == tvp)
- vrele(tdvp);
- else
- vput(tdvp);
- if (tvp)
- vput(tvp);
- VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
- vrele(fdvp);
- vrele(fvp);
- return (error);
+
+ /* Now that we have fromchild, make sure it passes the parent check */
+ if (fromchild_vnode == illegal_vnode) {
+ error = EINVAL;
+ goto fail_withlocks;
}
+ if (illegal_vnode)
+ vrele(illegal_vnode);
+ illegal_vnode = NULL;
/*
- * Check if just deleting a link name.
+ * All four vnodes are now locked, and we can proceed to do
+ * the real work.
*/
- if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
- (VTOI(tdvp)->i_flags & APPEND))) {
+
+ KASSERT(VOP_ISLOCKED(fromparent_vnode));
+ KASSERT(VOP_ISLOCKED(fromchild_vnode));
+ KASSERT(VOP_ISLOCKED(toparent_vnode));
+ KASSERT(tochild_vnode == NULL || VOP_ISLOCKED(tochild_vnode));
+
+ /* Get the rest of the inodes. */
+ fromchild_i = VTOI(fromchild_vnode);
+ tochild_i = tochild_vnode ? VTOI(tochild_vnode) : NULL;
+
+ KASSERT(fromparent_i != fromchild_i);
+ KASSERT(tochild_i == NULL || toparent_i != tochild_i);
+ /*
+ * Part 3. More checks.
+ */
+
+ /* More cross-directory cases. */
+ if (fromparent_vnode->v_mount != fromchild_vnode->v_mount) {
+ error = EXDEV;
+ goto fail_withlocks;
+ }
+ if (tochild_vnode != NULL &&
+ toparent_vnode->v_mount != tochild_vnode->v_mount) {
+ error = EXDEV;
+ goto fail_withlocks;
+ }
+
+ /* Check flags. */
+ if ((fromparent_i->i_flags & APPEND) != 0) {
error = EPERM;
- goto abortit;
+ goto fail_withlocks;
}
- if (fvp == tvp) {
- if (fvp->v_type == VDIR) {
- error = EINVAL;
- goto abortit;
- }
-
- /* Release destination completely. */
- VOP_ABORTOP(tdvp, tcnp);
- vput(tdvp);
- vput(tvp);
-
- /* Delete source. */
- vrele(fvp);
- fcnp->cn_flags &= ~(MODMASK | SAVESTART);
- fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
- fcnp->cn_nameiop = DELETE;
- vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
- if ((error = relookup(fdvp, &fvp, fcnp))) {
- vput(fdvp);
- return (error);
- }
- return (VOP_REMOVE(fdvp, fvp, fcnp));
- }
- if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
- goto abortit;
- fdp = VTOI(fdvp);
- ip = VTOI(fvp);
- if ((nlink_t) ip->i_nlink >= LINK_MAX) {
- VOP_UNLOCK(fvp, 0);
- error = EMLINK;
- goto abortit;
+ if ((fromchild_i->i_flags & (IMMUTABLE | APPEND)) != 0) {
+ error = EPERM;
+ goto fail_withlocks;
}
- if ((ip->i_flags & (IMMUTABLE | APPEND)) ||
- (fdp->i_flags & APPEND)) {
- VOP_UNLOCK(fvp, 0);
+ if ((toparent_i->i_flags & APPEND) != 0) {
error = EPERM;
- goto abortit;
+ goto fail_withlocks;
+ }
+ if (tochild_vnode != NULL &&
+ (tochild_i->i_flags & (IMMUTABLE | APPEND)) != 0) {
+ error = EPERM;
+ goto fail_withlocks;
}
- if ((ip->i_mode & IFMT) == IFDIR) {
- /*
- * Avoid ".", "..", and aliases of "." for obvious reasons.
- */
- if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
- fdp == ip ||
- (fcnp->cn_flags & ISDOTDOT) ||
- (tcnp->cn_flags & ISDOTDOT) ||
- (ip->i_flag & IN_RENAME)) {
- VOP_UNLOCK(fvp, 0);
- error = EINVAL;
- goto abortit;
- }
- ip->i_flag |= IN_RENAME;
- doingdirectory = 1;
- }
- oldparent = fdp->i_number;
- VN_KNOTE(fdvp, NOTE_WRITE); /* XXXLUKEM/XXX: right place? */
-
- /*
- * When the target exists, both the directory
- * and target vnodes are returned locked.
- */
- tdp = VTOI(tdvp);
- txp = NULL;
- if (tvp)
- txp = VTOI(tvp);
- mp = fdvp->v_mount;
- fstrans_start(mp, FSTRANS_SHARED);
+ /* Moving a directory over itself is not allowed. */
+ if (fromchild_vnode == tochild_vnode &&
+ fromchild_vnode->v_type == VDIR) {
+ error = EINVAL;
+ goto fail_withlocks;
+ }
- /*
- * If ".." must be changed (ie the directory gets a new
- * parent) then the source directory must not be in the
- * directory hierarchy above the target, as this would
- * orphan everything below the source directory. Also
- * the user must have write permission in the source so
- * as to be able to change "..". We must repeat the call
- * to namei, as the parent directory is unlocked by the
- * call to checkpath().
- */
- error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
- VOP_UNLOCK(fvp, 0);
- if (oldparent != tdp->i_number)
- newparent = tdp->i_number;
- if (doingdirectory && newparent) {
- if (error) /* write access check above */
- goto out;
- if (txp != NULL)
- vput(tvp);
- txp = NULL;
- vref(tdvp); /* compensate for the ref checkpath loses */
- if ((error = ufs_checkpath(ip, tdp, tcnp->cn_cred)) != 0) {
- vrele(tdvp);
- tdp = NULL;
- goto out;
- }
- tcnp->cn_flags &= ~SAVESTART;
- tdp = NULL;
- vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
- error = relookup(tdvp, &tvp, tcnp);
- if (error != 0) {
- vput(tdvp);
- goto out;
- }
- tdp = VTOI(tdvp);
- if (tvp)
- txp = VTOI(tvp);
+ /* Make sure bumping the target's link count won't overflow. */
+ if ((nlink_t) fromchild_i->i_nlink >= LINK_MAX) {
+ error = EMLINK;
+ goto fail_withlocks;
+ }
+
+ /* This flag is now useless and should be removed. */
+ if (fromchild_i->i_flag & IN_RENAME) {
+ error = EINVAL;
+ goto fail_withlocks;
}
/*
- * XXX handle case where fdvp is parent of tdvp,
- * by unlocking tdvp and regrabbing it with vget after?
+ * Part 4. Rename logic.
*/
- /* save directory lookup information in case tdvp == fdvp */
- saved_t_count = tdp->i_count;
- saved_t_endoff = tdp->i_endoff;
- saved_t_diroff = tdp->i_diroff;
- saved_t_offset = tdp->i_offset;
- saved_t_reclen = tdp->i_reclen;
+ /* If moving a file over itself, just unlink the from end. */
+ if (fromchild_vnode == tochild_vnode) {
+ /* This case was ruled out above. */
+ KASSERT(fromchild_vnode->v_type != VDIR);
- /*
- * This was moved up to before the journal lock to
- * avoid potential deadlock
- */
- fcnp->cn_flags &= ~(MODMASK | SAVESTART);
- fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
- if (newparent) {
- /* Check for the rename("foo/foo", "foo") case. */
- if (fdvp == tvp) {
- error = doingdirectory ? ENOTEMPTY : EISDIR;
- goto out;
+ /* Release to-side completely. */
+ VOP_ABORTOP(toparent_vnode, to_name);
+ if (toparent_vnode != fromparent_vnode) {
+ VOP_UNLOCK(toparent_vnode, 0);
}
- vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
- if ((error = relookup(fdvp, &fvp, fcnp))) {
- vput(fdvp);
- vrele(ap->a_fvp);
- goto out2;
- }
- } else {
- error = VOP_LOOKUP(fdvp, &fvp, fcnp);
- if (error && (error != EJUSTRETURN)) {
- vrele(ap->a_fvp);
- goto out2;
- }
- error = 0;
- }
- if (fvp != NULL) {
- fxp = VTOI(fvp);
- fdp = VTOI(fdvp);
- } else {
- /*
- * From name has disappeared.
- */
- if (doingdirectory)
- panic("rename: lost dir entry");
+ vrele(toparent_vnode);
+ vrele(tochild_vnode);
+
+ /* Clean up a_fvp/a_tvp. */
vrele(ap->a_fvp);
- error = ENOENT; /* XXX ufs_rename sets "0" here */
- goto out2;
+ if (ap->a_tvp != NULL) {
+ vrele(ap->a_tvp);
+ }
+
+ /* Now unlink the from-side. */
+ from_name->cn_flags &= ~(MODMASK | SAVESTART);
+ from_name->cn_flags |= LOCKPARENT | LOCKLEAF;
+ from_name->cn_nameiop = DELETE;
+ return VOP_REMOVE(fromparent_vnode, fromchild_vnode, from_name);
+ }
+
+ oldparent = fromparent_i->i_number;
+ newparent = toparent_i->i_number;
+ doingdirectory = (fromchild_i->i_mode & IFMT) == IFDIR;
+
+ /* XXX update the logic below to not test "newparent!=0" */
+ if (oldparent == newparent) {
+ newparent = 0;
+ }
+ /* Check permissions. */
+ error = VOP_ACCESS(fromchild_vnode, VWRITE, to_name->cn_cred);
+ if (error) {
+ goto fail_withlocks;
}
- vrele(ap->a_fvp);
- /* save directory lookup information in case tdvp == fdvp */
- saved_f_count = fdp->i_count;
- saved_f_diroff = fdp->i_diroff;
- saved_f_offset = fdp->i_offset;
- saved_f_reclen = fdp->i_reclen;
-
- /* restore directory lookup information in case tdvp == fdvp */
- tdp->i_offset = saved_t_offset;
- tdp->i_reclen = saved_t_reclen;
- tdp->i_count = saved_t_count;
- tdp->i_endoff = saved_t_endoff;
- tdp->i_diroff = saved_t_diroff;
+ noabort = 1;
- error = UFS_WAPBL_BEGIN(fdvp->v_mount);
+ if (doingdirectory) {
+ fromchild_i->i_flag |= IN_RENAME;
+ }
+ /* XXXLUKEM/XXX: right place? */
+ VN_KNOTE(fromparent_vnode, NOTE_WRITE);
+
+ mp = fromparent_vnode->v_mount;
+ fstrans_start(mp, FSTRANS_SHARED);
+
+ error = UFS_WAPBL_BEGIN(fromparent_vnode->v_mount);
if (error)
- goto out2;
+ goto fail_withfstrans;
+
+ /*
+ * restore directory lookup information in case toparent_vnode
+ * == fromparent_vnode
+ */
+ if (fromparent_vnode == toparent_vnode) {
+ toparent_i->i_offset = saved_t_offset;
+ toparent_i->i_reclen = saved_t_reclen;
+ toparent_i->i_count = saved_t_count;
+ toparent_i->i_endoff = saved_t_endoff;
+ toparent_i->i_diroff = saved_t_diroff;
+ }
/*
* 1) Bump link count while we're moving stuff
@@ -400,14 +685,15 @@ wapbl_ufs_rename(void *v)
* completing our work, the link count
* may be wrong, but correctable.
*/
- ip->i_ffs_effnlink++;
- ip->i_nlink++;
- DIP_ASSIGN(ip, nlink, ip->i_nlink);
- ip->i_flag |= IN_CHANGE;
- if (DOINGSOFTDEP(fvp))
- softdep_change_linkcnt(ip);
- if ((error = UFS_UPDATE(fvp, NULL, NULL, UPDATE_DIROP)) != 0) {
- goto bad;
+ fromchild_i->i_ffs_effnlink++;
+ fromchild_i->i_nlink++;
+ DIP_ASSIGN(fromchild_i, nlink, fromchild_i->i_nlink);
+ fromchild_i->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(fromchild_vnode))
+ softdep_change_linkcnt(fromchild_i);
+ error = UFS_UPDATE(fromchild_vnode, NULL, NULL, UPDATE_DIROP);
+ if (error) {
+ goto fail_withwapbl;
}
/*
@@ -417,8 +703,9 @@ wapbl_ufs_rename(void *v)
* entry to reference the source inode and
* expunge the original entry's existence.
*/
- if (txp == NULL) {
- if (tdp->i_dev != ip->i_dev)
+
+ if (tochild_i == NULL) {
+ if (toparent_i->i_dev != fromchild_i->i_dev)
panic("rename: EXDEV");
/*
* Account for ".." in new directory.
@@ -426,52 +713,53 @@ wapbl_ufs_rename(void *v)
* parent we don't fool with the link count.
*/
if (doingdirectory && newparent) {
- if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
+ if ((nlink_t)toparent_i->i_nlink >= LINK_MAX) {
error = EMLINK;
- goto bad;
+ goto fail_withlinkcount;
}
- tdp->i_ffs_effnlink++;
- tdp->i_nlink++;
- DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
- tdp->i_flag |= IN_CHANGE;
- if (DOINGSOFTDEP(tdvp))
- softdep_change_linkcnt(tdp);
- if ((error = UFS_UPDATE(tdvp, NULL, NULL,
+ toparent_i->i_ffs_effnlink++;
+ toparent_i->i_nlink++;
+ DIP_ASSIGN(toparent_i, nlink, toparent_i->i_nlink);
+ toparent_i->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(toparent_vnode))
+ softdep_change_linkcnt(toparent_i);
+ if ((error = UFS_UPDATE(toparent_vnode, NULL, NULL,
UPDATE_DIROP)) != 0) {
- tdp->i_ffs_effnlink--;
- tdp->i_nlink--;
- DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
- tdp->i_flag |= IN_CHANGE;
- if (DOINGSOFTDEP(tdvp))
- softdep_change_linkcnt(tdp);
- goto bad;
+ toparent_i->i_ffs_effnlink--;
+ toparent_i->i_nlink--;
+ DIP_ASSIGN(toparent_i, nlink,
toparent_i->i_nlink);
+ toparent_i->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(toparent_vnode))
+ softdep_change_linkcnt(toparent_i);
+ goto fail_withlinkcount;
}
}
newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
- ufs_makedirentry(ip, tcnp, newdir);
- error = ufs_direnter(tdvp, NULL, newdir, tcnp, NULL);
+ ufs_makedirentry(fromchild_i, to_name, newdir);
+ error = ufs_direnter(toparent_vnode, NULL, newdir, to_name,
NULL);
pool_cache_put(ufs_direct_cache, newdir);
if (error != 0) {
if (doingdirectory && newparent) {
- tdp->i_ffs_effnlink--;
- tdp->i_nlink--;
- DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
- tdp->i_flag |= IN_CHANGE;
- if (DOINGSOFTDEP(tdvp))
- softdep_change_linkcnt(tdp);
- (void)UFS_UPDATE(tdvp, NULL, NULL,
+ toparent_i->i_ffs_effnlink--;
+ toparent_i->i_nlink--;
+ DIP_ASSIGN(toparent_i, nlink,
toparent_i->i_nlink);
+ toparent_i->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(toparent_vnode))
+ softdep_change_linkcnt(toparent_i);
+ (void)UFS_UPDATE(toparent_vnode, NULL, NULL,
UPDATE_WAIT | UPDATE_DIROP);
}
- goto bad;
+ goto fail_withlinkcount;
}
- VN_KNOTE(tdvp, NOTE_WRITE);
+ VN_KNOTE(toparent_vnode, NOTE_WRITE);
} else {
- if (txp->i_dev != tdp->i_dev || txp->i_dev != ip->i_dev)
+ if (tochild_i->i_dev != toparent_i->i_dev ||
+ tochild_i->i_dev != fromchild_i->i_dev)
panic("rename: EXDEV");
/*
* Short circuit rename(foo, foo).
*/
- if (txp->i_number == ip->i_number)
+ if (tochild_i->i_number == fromchild_i->i_number)
panic("rename: same file");
/*
* If the parent directory is "sticky", then the user must
@@ -479,49 +767,51 @@ wapbl_ufs_rename(void *v)
* otherwise the destination may not be changed (except by
* root). This implements append-only directories.
*/
- if ((tdp->i_mode & S_ISTXT) &&
- kauth_authorize_generic(tcnp->cn_cred,
+ if ((toparent_i->i_mode & S_ISTXT) &&
+ kauth_authorize_generic(to_name->cn_cred,
KAUTH_GENERIC_ISSUSER, NULL) != 0 &&
- kauth_cred_geteuid(tcnp->cn_cred) != tdp->i_uid &&
- txp->i_uid != kauth_cred_geteuid(tcnp->cn_cred)) {
+ kauth_cred_geteuid(to_name->cn_cred) != toparent_i->i_uid &&
+ tochild_i->i_uid != kauth_cred_geteuid(to_name->cn_cred)) {
error = EPERM;
- goto bad;
+ goto fail_withlinkcount;
}
/*
* Target must be empty if a directory and have no links
* to it. Also, ensure source and target are compatible
* (both directories, or both not directories).
*/
- if ((txp->i_mode & IFMT) == IFDIR) {
- if (txp->i_ffs_effnlink > 2 ||
- !ufs_dirempty(txp, tdp->i_number, tcnp->cn_cred)) {
+ if ((tochild_i->i_mode & IFMT) == IFDIR) {
+ if (tochild_i->i_ffs_effnlink > 2 ||
+ !ufs_dirempty(tochild_i, toparent_i->i_number,
+ to_name->cn_cred)) {
error = ENOTEMPTY;
- goto bad;
+ goto fail_withlinkcount;
}
if (!doingdirectory) {
error = ENOTDIR;
- goto bad;
+ goto fail_withlinkcount;
}
- cache_purge(tdvp);
+ cache_purge(toparent_vnode);
} else if (doingdirectory) {
error = EISDIR;
- goto bad;
+ goto fail_withlinkcount;
}
- if ((error = ufs_dirrewrite(tdp, txp, ip->i_number,
- IFTODT(ip->i_mode), doingdirectory && newparent ?
+ if ((error = ufs_dirrewrite(toparent_i, tochild_i,
+ fromchild_i->i_number,
+ IFTODT(fromchild_i->i_mode), doingdirectory && newparent ?
newparent : doingdirectory, IN_CHANGE | IN_UPDATE)) != 0)
- goto bad;
+ goto fail_withlinkcount;
if (doingdirectory) {
if (!newparent) {
- tdp->i_ffs_effnlink--;
- if (DOINGSOFTDEP(tdvp))
- softdep_change_linkcnt(tdp);
- }
- txp->i_ffs_effnlink--;
- if (DOINGSOFTDEP(tvp))
- softdep_change_linkcnt(txp);
+ toparent_i->i_ffs_effnlink--;
+ if (DOINGSOFTDEP(toparent_vnode))
+ softdep_change_linkcnt(toparent_i);
+ }
+ tochild_i->i_ffs_effnlink--;
+ if (DOINGSOFTDEP(tochild_vnode))
+ softdep_change_linkcnt(tochild_i);
}
- if (doingdirectory && !DOINGSOFTDEP(tvp)) {
+ if (doingdirectory && !DOINGSOFTDEP(tochild_vnode)) {
/*
* Truncate inode. The only stuff left in the directory
* is "." and "..". The "." reference is inconsequential
@@ -534,34 +824,39 @@ wapbl_ufs_rename(void *v)
* them now.
*/
if (!newparent) {
- tdp->i_nlink--;
- DIP_ASSIGN(tdp, nlink, tdp->i_nlink);
- tdp->i_flag |= IN_CHANGE;
- UFS_WAPBL_UPDATE(tdvp, NULL, NULL, 0);
- }
- txp->i_nlink--;
- DIP_ASSIGN(txp, nlink, txp->i_nlink);
- txp->i_flag |= IN_CHANGE;
- if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC,
- tcnp->cn_cred)))
- goto bad;
- }
- VN_KNOTE(tdvp, NOTE_WRITE);
- VN_KNOTE(tvp, NOTE_DELETE);
- }
-
- /* restore directory lookup information in case tdvp == fdvp */
- fdp->i_offset = saved_f_offset;
- fdp->i_reclen = saved_f_reclen;
- fdp->i_count = saved_f_count;
- fdp->i_diroff = saved_f_diroff;
+ toparent_i->i_nlink--;
+ DIP_ASSIGN(toparent_i, nlink,
toparent_i->i_nlink);
+ toparent_i->i_flag |= IN_CHANGE;
+ UFS_WAPBL_UPDATE(toparent_vnode, NULL, NULL, 0);
+ }
+ tochild_i->i_nlink--;
+ DIP_ASSIGN(tochild_i, nlink, tochild_i->i_nlink);
+ tochild_i->i_flag |= IN_CHANGE;
+ if ((error = UFS_TRUNCATE(tochild_vnode, (off_t)0,
+ IO_SYNC, to_name->cn_cred)))
+ goto fail_withlinkcount;
+ }
+ VN_KNOTE(toparent_vnode, NOTE_WRITE);
+ VN_KNOTE(tochild_vnode, NOTE_DELETE);
+ }
+
+ /*
+ * restore directory lookup information in case toparent_vnode
+ * == fromparent_vnode
+ */
+ if (fromparent_vnode == toparent_vnode) {
+ fromparent_i->i_offset = saved_f_offset;
+ fromparent_i->i_reclen = saved_f_reclen;
+ fromparent_i->i_count = saved_f_count;
+ fromparent_i->i_diroff = saved_f_diroff;
+ }
/*
* Handle case where the directory we need to remove may have
* been moved when the directory insertion above performed compaction.
* or when i_count may be wrong due to insertion before this entry.
*/
- if ((tdp->i_number == fdp->i_number) &&
+ if ((toparent_i->i_number == fromparent_i->i_number) &&
(((saved_f_offset >= saved_t_offset) &&
(saved_f_offset < saved_t_offset + saved_t_count)) ||
((saved_f_offset - saved_f_count >= saved_t_offset) &&
@@ -569,7 +864,7 @@ wapbl_ufs_rename(void *v)
saved_t_offset + saved_t_count)))) {
struct buf *bp;
struct direct *ep;
- struct ufsmount *ump = fdp->i_ump;
+ struct ufsmount *ump = fromparent_i->i_ump;
doff_t endsearch; /* offset to end directory search */
int dirblksiz = ump->um_dirblksiz;
const int needswap = UFS_MPNEEDSWAP(ump);
@@ -577,37 +872,38 @@ wapbl_ufs_rename(void *v)
int namlen, entryoffsetinblock;
char *dirbuf;
- bmask = fdvp->v_mount->mnt_stat.f_iosize - 1;
+ bmask = fromparent_vnode->v_mount->mnt_stat.f_iosize - 1;
/*
- * the fcnp entry will be somewhere between the start of
+ * the from_name entry will be somewhere between the start of
* compaction and the original location.
*/
- fdp->i_offset = saved_t_offset;
- error = ufs_blkatoff(fdvp, (off_t)fdp->i_offset, &dirbuf, &bp,
- false);
+ fromparent_i->i_offset = saved_t_offset;
+ error = ufs_blkatoff(fromparent_vnode,
+ (off_t)fromparent_i->i_offset, &dirbuf, &bp, false);
if (error)
- goto bad;
+ goto fail_withlinkcount;
/*
- * keep existing fdp->i_count in case
- * compaction started at the same location as the fcnp entry.
+ * keep existing fromparent_i->i_count in case
+ * compaction started at the same location as the from_name
entry.
*/
endsearch = saved_f_offset + saved_f_reclen;
entryoffsetinblock = 0;
- while (fdp->i_offset < endsearch) {
+ while (fromparent_i->i_offset < endsearch) {
int reclen;
/*
* If necessary, get the next directory block.
*/
- if ((fdp->i_offset & bmask) == 0) {
+ if ((fromparent_i->i_offset & bmask) == 0) {
if (bp != NULL)
brelse(bp, 0);
- error = ufs_blkatoff(fdvp, (off_t)fdp->i_offset,
- &dirbuf, &bp, false);
+ error = ufs_blkatoff(fromparent_vnode,
+ (off_t)fromparent_i->i_offset, &dirbuf, &bp,
+ false);
if (error)
- goto bad;
+ goto fail_withlinkcount;
entryoffsetinblock = 0;
}
@@ -616,37 +912,38 @@ wapbl_ufs_rename(void *v)
reclen = ufs_rw16(ep->d_reclen, needswap);
#if (BYTE_ORDER == LITTLE_ENDIAN)
- if (FSFMT(fdvp) && needswap == 0)
+ if (FSFMT(fromparent_vnode) && needswap == 0)
namlen = ep->d_type;
else
namlen = ep->d_namlen;
#else
- if (FSFMT(fdvp) && needswap != 0)
+ if (FSFMT(fromparent_vnode) && needswap != 0)
namlen = ep->d_type;
else
namlen = ep->d_namlen;
#endif
if ((ep->d_ino != 0) &&
(ufs_rw32(ep->d_ino, needswap) != WINO) &&
- (namlen == fcnp->cn_namelen) &&
- memcmp(ep->d_name, fcnp->cn_nameptr, namlen) == 0) {
- fdp->i_reclen = reclen;
+ (namlen == from_name->cn_namelen) &&
+ memcmp(ep->d_name, from_name->cn_nameptr, namlen)
== 0) {
+ fromparent_i->i_reclen = reclen;
break;
}
- fdp->i_offset += reclen;
- fdp->i_count = reclen;
+ fromparent_i->i_offset += reclen;
+ fromparent_i->i_count = reclen;
entryoffsetinblock += reclen;
}
- KASSERT(fdp->i_offset <= endsearch);
+ KASSERT(fromparent_i->i_offset <= endsearch);
/*
- * If fdp->i_offset points to start of a directory block,
- * set fdp->i_count so ufs_dirremove() doesn't compact over
- * a directory block boundary.
+ * If fromparent_i->i_offset points to start of a
+ * directory block, set fromparent_i->i_count so
+ * ufs_dirremove() doesn't compact over a directory
+ * block boundary.
*/
- if ((fdp->i_offset & (dirblksiz - 1)) == 0)
- fdp->i_count = 0;
+ if ((fromparent_i->i_offset & (dirblksiz - 1)) == 0)
+ fromparent_i->i_count = 0;
brelse(bp, 0);
}
@@ -663,68 +960,96 @@ wapbl_ufs_rename(void *v)
* flag ensures that it cannot be moved by another rename or removed
* by a rmdir.
*/
- if (fxp != ip) {
- if (doingdirectory)
- panic("rename: lost dir entry");
+
+ /*
+ * If the source is a directory with a
+ * new parent, the link count of the old
+ * parent directory must be decremented
+ * and ".." set to point to the new parent.
+ */
+ if (doingdirectory && newparent) {
+ KASSERT(toparent_i != NULL);
+ fromchild_i->i_offset = mastertemplate.dot_reclen;
+ ufs_dirrewrite(fromchild_i, toparent_i,
+ newparent, DT_DIR, 0, IN_CHANGE);
+ cache_purge(fromparent_vnode);
+ }
+ error = ufs_dirremove(fromparent_vnode, fromchild_i,
+ from_name->cn_flags, 0);
+ VN_KNOTE(fromchild_vnode, NOTE_RENAME);
+
+ if (error) {
+ goto fail_withwapbl;
+ }
+ goto succeed;
+
+fail_withlinkcount:
+ fromchild_i->i_ffs_effnlink--;
+ fromchild_i->i_nlink--;
+ DIP_ASSIGN(fromchild_i, nlink, fromchild_i->i_nlink);
+ fromchild_i->i_flag |= IN_CHANGE;
+ UFS_WAPBL_UPDATE(fromchild_vnode, NULL, NULL, 0);
+ if (DOINGSOFTDEP(fromchild_vnode))
+ softdep_change_linkcnt(fromchild_i);
+
+fail_withwapbl:
+succeed:
+ UFS_WAPBL_END(fromparent_vnode->v_mount);
+
+fail_withfstrans:
+ fstrans_done(mp);
+ fromchild_i->i_flag &= ~IN_RENAME;
+
+fail_withlocks:
+ /* See comment up top about which of these can legally be the same. */
+ if (fromparent_vnode == toparent_vnode) {
+ VOP_UNLOCK(fromparent_vnode, 0);
} else {
- /*
- * If the source is a directory with a
- * new parent, the link count of the old
- * parent directory must be decremented
- * and ".." set to point to the new parent.
- */
- if (doingdirectory && newparent) {
- KASSERT(fdp != NULL);
- fxp->i_offset = mastertemplate.dot_reclen;
- ufs_dirrewrite(fxp, fdp, newparent, DT_DIR, 0,
IN_CHANGE);
- cache_purge(fdvp);
- }
- error = ufs_dirremove(fdvp, fxp, fcnp->cn_flags, 0);
- fxp->i_flag &= ~IN_RENAME;
- }
- VN_KNOTE(fvp, NOTE_RENAME);
- goto done;
-
- out:
- vrele(fvp);
- vrele(fdvp);
- goto out2;
-
- /* exit routines from steps 1 & 2 */
- bad:
- if (doingdirectory)
- ip->i_flag &= ~IN_RENAME;
- ip->i_ffs_effnlink--;
- ip->i_nlink--;
- DIP_ASSIGN(ip, nlink, ip->i_nlink);
- ip->i_flag |= IN_CHANGE;
- ip->i_flag &= ~IN_RENAME;
- UFS_WAPBL_UPDATE(fvp, NULL, NULL, 0);
- if (DOINGSOFTDEP(fvp))
- softdep_change_linkcnt(ip);
- done:
- UFS_WAPBL_END(fdvp->v_mount);
- vput(fdvp);
- vput(fvp);
- out2:
- /*
- * clear IN_RENAME - some exit paths happen too early to go
- * through the cleanup done in the "bad" case above, so we
- * always do this mini-cleanup here.
- */
- ip->i_flag &= ~IN_RENAME;
-
- if (txp)
- vput(ITOV(txp));
- if (tdp) {
- if (newparent)
- vput(ITOV(tdp));
- else
- vrele(ITOV(tdp));
+ VOP_UNLOCK(fromparent_vnode, 0);
+ VOP_UNLOCK(toparent_vnode, 0);
+ }
+ if (fromchild_vnode == tochild_vnode) {
+ VOP_UNLOCK(fromchild_vnode, 0);
+ } else {
+ VOP_UNLOCK(fromchild_vnode, 0);
+ if (tochild_vnode) {
+ VOP_UNLOCK(tochild_vnode, 0);
+ }
+ }
+fail_nolocks:
+ if (fromchild_vnode != NULL) {
+ vrele(fromchild_vnode);
+ fromchild_vnode = NULL;
+ }
+ if (tochild_vnode != NULL) {
+ vrele(tochild_vnode);
+ tochild_vnode = NULL;
+ }
+ if (illegal_vnode) {
+ vrele(illegal_vnode);
+ illegal_vnode = NULL;
}
- fstrans_done(mp);
- return (error);
+fail_early:
+ if (!noabort) {
+ /* XXX, why not in NFS? */
+ VOP_ABORTOP(fromparent_vnode, from_name);
+ VOP_ABORTOP(toparent_vnode, to_name);
+ }
+
+ vrele(fromparent_vnode);
+ vrele(toparent_vnode);
+
+ KASSERT(fromchild_vnode == NULL);
+ KASSERT(tochild_vnode == NULL);
+ KASSERT(illegal_vnode == NULL);
+
+ vrele(ap->a_fvp);
+ if (ap->a_tvp != NULL) {
+ vrele(ap->a_tvp);
+ }
+
+ return error;
}
#ifdef WAPBL_DEBUG_INODES
Home |
Main Index |
Thread Index |
Old Index