tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: [PATCH] Re: Locking in src/sys/dev/fss.c



On Fri, Aug 09, 2024 at 07:29:52PM +0000, Taylor R Campbell wrote:
> I don't know what the locking protocol is, but any changes to it
> absolutely must be accompanied by comments identifying the lock order
> and, if applicable, any rules like `forbidden in softint'.

Right, I go for not changing it. 

> I don't understand, how did you come to this conclusion?

Now I retry to reproduce the problem and I cannot get it
to show up. But I was hit by it a long time ago, I should
have check what I recalled before starting this async
sapshot creation thread thing.

Let us forget about the snapshot creatin thread, it seems there
are many locks missing. Attached patch tries to address that.

I wonder if I should not add a vfs_busy/vfs_unbusy in such situation:
        mount = vp->v_mount;
(...)
        vrele(vp);
(...)
        check for another vnode's ->v_mount == mount

-- 
Emmanuel Dreyfus
manu%netbsd.org@localhost
Index: fss.c
===================================================================
RCS file: /cvsroot/src/sys/dev/fss.c,v
retrieving revision 1.114
diff -U4 -r1.114 fss.c
--- fss.c	22 Mar 2023 21:14:46 -0000	1.114
+++ fss.c	10 Aug 2024 00:28:37 -0000
@@ -219,9 +219,12 @@
 		if (sc == NULL) {
 			mutex_exit(&fss_device_lock);
 			return ENOMEM;
 		}
+
+		mutex_enter(&sc->sc_slock);
 		sc->sc_state = FSS_IDLE;
+		mutex_exit(&sc->sc_slock);
 	}
 
 	mutex_enter(&sc->sc_slock);
 
@@ -359,11 +362,19 @@
 			error = EPERM;
 		if (error == 0 && sc->sc_state != FSS_IDLE) {
 			error = EBUSY;
 		} else {
+			char mntname[MNAMELEN];
+
 			sc->sc_state = FSS_CREATING;
-			copyinstr(fss->fss_mount, sc->sc_mntname,
-			    sizeof(sc->sc_mntname), NULL);
+			mutex_exit(&sc->sc_slock);
+
+			copyinstr(fss->fss_mount, mntname,
+			    sizeof(mntname), NULL);
+
+			mutex_enter(&sc->sc_slock);
+			memcpy(sc->sc_mntname, mntname,
+			    sizeof(sc->sc_mntname));
 			memset(&sc->sc_time, 0, sizeof(sc->sc_time));
 			sc->sc_clshift = 0;
 		}
 		mutex_exit(&sc->sc_slock);
@@ -701,13 +712,13 @@
 	struct timespec ts;
 	/* distinguish lookup 1 from lookup 2 to reduce mistakes */
 	struct pathbuf *pb2;
 	struct vnode *vp, *vp2;
+	struct mount *mount;
 
 	/*
 	 * Get the mounted file system.
 	 */
-
 	error = namei_simple_user(fss->fss_mount,
 				NSM_FOLLOW_NOEMULROOT, &vp);
 	if (error != 0)
 		return error;
@@ -716,10 +727,13 @@
 		vrele(vp);
 		return EINVAL;
 	}
 
-	sc->sc_mount = vp->v_mount;
-	memcpy(sc->sc_mntname, sc->sc_mount->mnt_stat.f_mntonname, MNAMELEN);
+	mount = vp->v_mount;
+	mutex_enter(&sc->sc_slock);
+	sc->sc_mount = mount;
+	memcpy(sc->sc_mntname, mount->mnt_stat.f_mntonname, MNAMELEN);
+	mutex_exit(&sc->sc_slock);
 
 	vrele(vp);
 
 	/*
@@ -730,23 +744,29 @@
 				NSM_FOLLOW_NOEMULROOT, &vp);
 	if (error != 0)
 		return error;
 
-	if (vp->v_type == VREG && vp->v_mount == sc->sc_mount) {
+	mutex_enter(&sc->sc_slock);
+	if (vp->v_type == VREG && vp->v_mount == mount) {
 		sc->sc_flags |= FSS_PERSISTENT;
 		sc->sc_bs_vp = vp;
 
-		fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize;
+		fsbsize = mount->mnt_stat.f_iosize;
 		bits = sizeof(sc->sc_bs_bshift)*NBBY;
+
 		for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < bits;
 		    sc->sc_bs_bshift++)
 			if (FSS_FSBSIZE(sc) == fsbsize)
 				break;
-		if (sc->sc_bs_bshift >= bits)
+
+		if (sc->sc_bs_bshift >= bits) {
+			mutex_exit(&sc->sc_slock);
 			return EINVAL;
+		}
 
 		sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
 		sc->sc_clshift = 0;
+		mutex_exit(&sc->sc_slock);
 
 		if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) {
 			error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE);
 			if (error)
@@ -754,12 +774,14 @@
 		}
 		error = vn_lock(vp, LK_EXCLUSIVE);
 		if (error != 0)
 			return error;
-		error = VFS_SNAPSHOT(sc->sc_mount, sc->sc_bs_vp, &ts);
+		error = VFS_SNAPSHOT(mount, vp, &ts);
+		mutex_enter(&sc->sc_slock);
 		TIMESPEC_TO_TIMEVAL(&sc->sc_time, &ts);
+		mutex_exit(&sc->sc_slock);
 
-		VOP_UNLOCK(sc->sc_bs_vp);
+		VOP_UNLOCK(vp);
 
 		return error;
 	}
 	vrele(vp);
@@ -767,12 +789,15 @@
 	/*
 	 * Get the block device it is mounted on and its size.
 	 */
 
-	error = spec_node_lookup_by_mount(sc->sc_mount, &vp);
+	error = spec_node_lookup_by_mount(mount, &vp);
 	if (error)
 		return error;
+
+	mutex_enter(&sc->sc_slock);
 	sc->sc_bdev = vp->v_rdev;
+	mutex_exit(&sc->sc_slock);
 
 	error = getdisksize(vp, &numsec, &secsize);
 	vrele(vp);
 	if (error)
@@ -784,19 +809,21 @@
 	 * Get the backing store
 	 */
 
 	error = pathbuf_copyin(fss->fss_bstore, &pb2);
-	if (error) {
+	if (error)
  		return error;
-	}
+
 	error = vn_open(NULL, pb2, 0, FREAD|FWRITE, 0, &vp2, NULL, NULL);
 	if (error != 0) {
 		pathbuf_destroy(pb2);
 		return error;
 	}
 	VOP_UNLOCK(vp2);
 
+	mutex_enter(&sc->sc_slock);
 	sc->sc_bs_vp = vp2;
+	mutex_exit(&sc->sc_slock);
 
 	if (vp2->v_type != VREG && vp2->v_type != VCHR) {
 		vrele(vp2);
 		pathbuf_destroy(pb2);
@@ -808,23 +835,30 @@
 		error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE);
 		if (error)
 			return error;
 	}
+
+	mutex_enter(&sc->sc_slock);
 	if (sc->sc_bs_vp->v_type == VREG) {
 		fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize;
-		if (fsbsize & (fsbsize-1))	/* No power of two */
+		if (fsbsize & (fsbsize-1)) {	/* No power of two */
+			mutex_exit(&sc->sc_slock);
 			return EINVAL;
+		}
 		for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < 32;
 		    sc->sc_bs_bshift++)
 			if (FSS_FSBSIZE(sc) == fsbsize)
 				break;
-		if (sc->sc_bs_bshift >= 32)
+		if (sc->sc_bs_bshift >= 32) {
+			mutex_exit(&sc->sc_slock);
 			return EINVAL;
+		}
 		sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
 	} else {
 		sc->sc_bs_bshift = DEV_BSHIFT;
 		sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
 	}
+	mutex_exit(&sc->sc_slock);
 
 	return 0;
 }
 
@@ -833,8 +867,10 @@
  */
 static int
 fss_create_snapshot(struct fss_softc *sc, struct fss_set *fss, struct lwp *l)
 {
+	struct mount *mount = sc->sc_mount;
+	struct vnode *vp;
 	int len, error;
 	u_int32_t csize;
 	off_t bsize;
 
@@ -845,15 +881,16 @@
 	 */
 	if ((error = fss_create_files(sc, fss, &bsize, l)) != 0)
 		goto bad;
 
+	mutex_enter(&sc->sc_slock);
 	if (sc->sc_flags & FSS_PERSISTENT) {
-		fss_softc_alloc(sc);
-		mutex_enter(&sc->sc_slock);
 		sc->sc_state = FSS_ACTIVE;
 		mutex_exit(&sc->sc_slock);
+		fss_softc_alloc(sc);
 		return 0;
 	}
+	mutex_exit(&sc->sc_slock);
 
 	/*
 	 * Set cluster size. Must be a power of two and
 	 * a multiple of backing store block size.
@@ -864,13 +901,15 @@
 		csize = fss->fss_csize;
 	if (bsize/csize > FSS_CLUSTER_MAX)
 		csize = bsize/FSS_CLUSTER_MAX+1;
 
+	mutex_enter(&sc->sc_slock);
 	for (sc->sc_clshift = sc->sc_bs_bshift; sc->sc_clshift < 32;
 	    sc->sc_clshift++)
 		if (FSS_CLSIZE(sc) >= csize)
 			break;
 	if (sc->sc_clshift >= 32) {
+		mutex_exit(&sc->sc_slock);
 		error = EINVAL;
 		goto bad;
 	}
 	sc->sc_clmask = FSS_CLSIZE(sc)-1;
@@ -898,52 +937,67 @@
 	sc->sc_indir_size = FSS_BTOCL(sc, len)+1;
 	sc->sc_clnext = sc->sc_indir_size;
 	sc->sc_indir_cur = 0;
 
-	if ((error = fss_softc_alloc(sc)) != 0)
+	mutex_exit(&sc->sc_slock);
+
+	if ((error = fss_softc_alloc(sc)) != 0) {
 		goto bad;
+	}
 
 	/*
 	 * Activate the snapshot.
 	 */
 
-	if ((error = vfs_suspend(sc->sc_mount, 0)) != 0)
+	if ((error = vfs_suspend(mount, 0)) != 0)
 		goto bad;
 
+	mutex_enter(&sc->sc_slock);
 	microtime(&sc->sc_time);
+	mutex_exit(&sc->sc_slock);
 
-	vrele_flush(sc->sc_mount);
-	error = VFS_SYNC(sc->sc_mount, MNT_WAIT, curlwp->l_cred);
-	if (error == 0)
-		error = fscow_establish(sc->sc_mount, fss_copy_on_write, sc);
+	vrele_flush(mount);
+	error = VFS_SYNC(mount, MNT_WAIT, curlwp->l_cred);
+	if (error == 0) 
+		error = fscow_establish(mount, fss_copy_on_write, sc);
 	if (error == 0) {
 		mutex_enter(&sc->sc_slock);
 		sc->sc_state = FSS_ACTIVE;
 		mutex_exit(&sc->sc_slock);
 	}
 
-	vfs_resume(sc->sc_mount);
+	vfs_resume(mount);
 
 	if (error != 0)
 		goto bad;
 
+	mutex_enter(&sc->sc_slock);
 	aprint_debug_dev(sc->sc_dev, "%s snapshot active\n", sc->sc_mntname);
 	aprint_debug_dev(sc->sc_dev,
 	    "%u clusters of %u, %u cache slots, %u indir clusters\n",
 	    sc->sc_clcount, FSS_CLSIZE(sc),
 	    sc->sc_cache_size, sc->sc_indir_size);
+	mutex_exit(&sc->sc_slock);
 
 	return 0;
 
 bad:
 	fss_softc_free(sc);
-	if (sc->sc_bs_vp != NULL) {
-		if (sc->sc_flags & FSS_PERSISTENT)
-			vrele(sc->sc_bs_vp);
-		else
-			vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred);
+
+	mutex_enter(&sc->sc_slock);
+	if ((vp = sc->sc_bs_vp) != NULL) {
+		if (sc->sc_flags & FSS_PERSISTENT) {
+			mutex_exit(&sc->sc_slock);
+			vrele(vp);
+		} else {
+			mutex_exit(&sc->sc_slock);
+			vn_close(vp, FREAD|FWRITE, l->l_cred);
+		}
 	}
+
+	mutex_enter(&sc->sc_slock);
 	sc->sc_bs_vp = NULL;
+	mutex_exit(&sc->sc_slock);
 
 	return error;
 }
 
@@ -1390,23 +1444,26 @@
 
 		error = devsw_attach(fss_cd.cd_name,
 		    &fss_bdevsw, &fss_bmajor, &fss_cdevsw, &fss_cmajor);
 		if (error) {
+			cv_destroy(&fss_device_cv);
 			mutex_destroy(&fss_device_lock);
 			break;
 		}
 
 		error = config_cfdriver_attach(&fss_cd);
 		if (error) {
 			devsw_detach(&fss_bdevsw, &fss_cdevsw);
+			cv_destroy(&fss_device_cv);
 			mutex_destroy(&fss_device_lock);
 			break;
 		}
 
 		error = config_cfattach_attach(fss_cd.cd_name, &fss_ca);
 		if (error) {
 			config_cfdriver_detach(&fss_cd);
 			devsw_detach(&fss_bdevsw, &fss_cdevsw);
+			cv_destroy(&fss_device_cv);
 			mutex_destroy(&fss_device_lock);
 			break;
 		}
 


Home | Main Index | Thread Index | Old Index