tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
vnd locking
There are a load of locking PRs involving vnd. I thought I would have a
stab at converting vnd to use condvars and mutexes (mutices?) as a first
step. I cobbled something together which allows me to
vnconfig vnd0 /tmp/foo.img
disklabel -r -w vnd0 vndtest
newfs /dev/vnd0b
mount /dev/vnd0b /mnt
cd /mnt
echo hello > foo
cat foo
df -h .
umount /mnt
with a LOCKDEBUG kernel, so I may have guessed right, but I haven't found
any documentation on The Right Way, so please review carefully, assume
the worst, and tell me how to do it better!
Things to ponder:
- there was some sort of start kernel thread, thread sets variable,
meanwhile loop waiting for variable to be set. I removed that. On
the other hand I added a MUSTJOIN for the kernel thread exiting
side of things.
- given that vnd is a pseudo-device, do I need an extra lock to care
of device creation/destruction? (What am I protecting against?
Cheers,
Patrick
Index: vnd.c
===================================================================
RCS file: /cvsroot/src/sys/dev/vnd.c,v
retrieving revision 1.243
diff -u -r1.243 vnd.c
--- vnd.c 26 Apr 2015 15:15:20 -0000 1.243
+++ vnd.c 22 May 2015 12:00:31 -0000
@@ -171,8 +171,6 @@
static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *);
static void vndgetdisklabel(dev_t, struct vnd_softc *);
-static int vndlock(struct vnd_softc *);
-static void vndunlock(struct vnd_softc *);
#ifdef VND_COMPRESSION
static void compstrategy(struct buf *, off_t);
static void *vnd_alloc(void *, u_int, u_int);
@@ -241,18 +239,13 @@
void
vndattach(int num)
{
- int error;
-
- error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
- if (error)
- aprint_error("%s: unable to register cfattach\n",
- vnd_cd.cd_name);
+ if (config_cfattach_attach(vnd_cd.cd_name, &vnd_ca))
+ aprint_error("%s: unable to register\n", vnd_cd.cd_name);
}
static int
vnd_match(device_t self, cfdata_t cfdata, void *aux)
{
-
return 1;
}
@@ -265,6 +258,10 @@
sc->sc_comp_offsets = NULL;
sc->sc_comp_buff = NULL;
sc->sc_comp_decombuf = NULL;
+ mutex_init(&sc->sc_sclock, MUTEX_DEFAULT, IPL_NONE);
+ cv_init(&sc->sc_activitycv, "vndac");
+ cv_init(&sc->sc_bufqcv, "vndbp");
+ cv_init(&sc->sc_pendingcv, "vndpc");
bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK);
disk_init(&sc->sc_dkdev, device_xname(self), &vnddkdriver);
if (!pmf_device_register(self, NULL, NULL))
@@ -284,6 +281,10 @@
}
pmf_device_deregister(self);
+ mutex_destroy(&sc->sc_sclock);
+ cv_destroy(&sc->sc_activitycv);
+ cv_destroy(&sc->sc_bufqcv);
+ cv_destroy(&sc->sc_pendingcv);
bufq_free(sc->sc_tab);
disk_destroy(&sc->sc_dkdev);
@@ -340,8 +341,7 @@
sc->sc_flags = VNF_KLABEL;
}
- if ((error = vndlock(sc)) != 0)
- return error;
+ mutex_enter(&sc->sc_sclock);
if ((sc->sc_flags & VNF_CLEARING) != 0) {
error = ENXIO;
@@ -374,7 +374,9 @@
*/
if ((sc->sc_flags & VNF_VLABEL) == 0) {
sc->sc_flags |= VNF_VLABEL;
+ mutex_exit(&sc->sc_sclock);
vndgetdisklabel(dev, sc);
+ mutex_enter(&sc->sc_sclock);
}
}
}
@@ -403,7 +405,7 @@
sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
done:
- vndunlock(sc);
+ mutex_exit(&sc->sc_sclock);
return error;
}
@@ -422,8 +424,7 @@
if (sc == NULL)
return ENXIO;
- if ((error = vndlock(sc)) != 0)
- return error;
+ mutex_enter(&sc->sc_sclock);
part = DISKPART(dev);
@@ -446,7 +447,7 @@
sc->sc_flags &= ~VNF_VLABEL;
}
- vndunlock(sc);
+ mutex_exit(&sc->sc_sclock);
if ((sc->sc_flags & VNF_INITED) == 0) {
if ((error = vnd_destroy(sc->sc_dev)) != 0) {
@@ -470,7 +471,8 @@
device_lookup_private(&vnd_cd, unit);
struct disklabel *lp;
daddr_t blkno;
- int s = splbio();
+
+ mutex_enter(&vnd->sc_sclock);
if (vnd == NULL) {
bp->b_error = ENXIO;
@@ -545,18 +547,20 @@
KASSERT(vnd->sc_pending >= 0 &&
vnd->sc_pending <= VND_MAXPENDING(vnd));
while (vnd->sc_pending == VND_MAXPENDING(vnd))
- tsleep(&vnd->sc_pending, PRIBIO, "vndpc", 0);
+ cv_wait(&vnd->sc_pendingcv, &vnd->sc_sclock);
vnd->sc_pending++;
}
+ mutex_exit(&vnd->sc_sclock);
bufq_put(vnd->sc_tab, bp);
- wakeup(&vnd->sc_tab);
- splx(s);
+ mutex_enter(&vnd->sc_sclock);
+ cv_signal(&vnd->sc_bufqcv);
+ mutex_exit(&vnd->sc_sclock);
return;
done:
+ mutex_exit(&vnd->sc_sclock);
bp->b_resid = bp->b_bcount;
biodone(bp);
- splx(s);
}
static bool
@@ -601,7 +605,8 @@
vndthread(void *arg)
{
struct vnd_softc *vnd = arg;
- int s;
+
+ mutex_enter(&vnd->sc_sclock);
/* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to
* directly access the backing vnode. If we can, use these two
@@ -620,31 +625,27 @@
"using read/write operations");
#endif
- s = splbio();
- vnd->sc_flags |= VNF_KTHREAD;
- wakeup(&vnd->sc_kthread);
-
/*
* Dequeue requests and serve them depending on the available
* vnode operations.
*/
- while ((vnd->sc_flags & VNF_VUNCONF) == 0) {
+ while ((vnd->sc_flags & VNF_KTHREAD) != 0) {
struct vndxfer *vnx;
struct buf *obp;
struct buf *bp;
obp = bufq_get(vnd->sc_tab);
if (obp == NULL) {
- tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0);
+ cv_wait(&vnd->sc_bufqcv, &vnd->sc_sclock);
continue;
};
if ((vnd->sc_flags & VNF_USE_VN_RDWR)) {
KASSERT(vnd->sc_pending > 0 &&
vnd->sc_pending <= VND_MAXPENDING(vnd));
if (vnd->sc_pending-- == VND_MAXPENDING(vnd))
- wakeup(&vnd->sc_pending);
+ cv_signal(&vnd->sc_pendingcv);
}
- splx(s);
+ mutex_exit(&vnd->sc_sclock);
#ifdef DEBUG
if (vnddebug & VDB_FOLLOW)
printf("vndthread(%p)\n", obp);
@@ -658,7 +659,7 @@
/* handle a compressed read */
if ((obp->b_flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) {
off_t bn;
-
+
/* Convert to a byte offset within the file. */
bn = obp->b_rawblkno *
vnd->sc_dkdev.dk_label->d_secsize;
@@ -667,22 +668,22 @@
goto done;
}
#endif /* VND_COMPRESSION */
-
+
/*
* Allocate a header for this transfer and link it to the
* buffer
*/
- s = splbio();
- vnx = VND_GETXFER(vnd);
- splx(s);
+ mutex_enter(&vnd->sc_sclock);
+ vnx = VND_GETXFER(vnd); /* may sleep */
+ mutex_exit(&vnd->sc_sclock);
vnx->vx_vnd = vnd;
- s = splbio();
+ mutex_enter(&vnd->sc_sclock);
while (vnd->sc_active >= vnd->sc_maxactive) {
- tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0);
+ cv_wait(&vnd->sc_activitycv, &vnd->sc_sclock);
}
vnd->sc_active++;
- splx(s);
+ mutex_exit(&vnd->sc_sclock);
/* Instrumentation. */
disk_busy(&vnd->sc_dkdev);
@@ -706,17 +707,15 @@
else
handle_with_rdwr(vnd, obp, bp);
- s = splbio();
+ mutex_enter(&vnd->sc_sclock);
continue;
done:
biodone(obp);
- s = splbio();
+ mutex_enter(&vnd->sc_sclock);
}
- vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF);
- wakeup(&vnd->sc_kthread);
- splx(s);
+ mutex_exit(&vnd->sc_sclock);
kthread_exit(0);
}
@@ -794,7 +793,7 @@
}
/*
- * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP
+ * Handles the read/write request given in 'bp' using the vnode's VOP_BMAP
* and VOP_STRATEGY operations.
*
* 'obp' is a pointer to the original request fed to the vnd device.
@@ -896,7 +895,7 @@
* fsync won't wait for this write which
* has no chance to complete before all nested bufs
* have been queued. But it has to be done
- * before the last VOP_STRATEGY()
+ * before the last VOP_STRATEGY()
* or the call to nestiobuf_done().
*/
w_vp = bp->b_vp;
@@ -905,9 +904,9 @@
mutex_exit(w_vp->v_interlock);
}
KASSERT(skipped != 0 || nbp != NULL);
- if (skipped)
+ if (skipped)
nestiobuf_done(bp, skipped, error);
- else
+ else
VOP_STRATEGY(vp, nbp);
}
@@ -917,7 +916,8 @@
struct vndxfer *vnx = VND_BUFTOXFER(bp);
struct vnd_softc *vnd = vnx->vx_vnd;
struct buf *obp = bp->b_private;
- int s = splbio();
+
+ mutex_enter(&vnd->sc_sclock);
KASSERT(&vnx->vx_buf == bp);
KASSERT(vnd->sc_active > 0);
@@ -931,9 +931,9 @@
(bp->b_flags & B_READ));
vnd->sc_active--;
if (vnd->sc_active == 0) {
- wakeup(&vnd->sc_tab);
+ cv_broadcast(&vnd->sc_activitycv);
}
- splx(s);
+ mutex_exit(&vnd->sc_sclock);
obp->b_error = bp->b_error;
obp->b_resid = bp->b_resid;
buf_destroy(bp);
@@ -1012,10 +1012,7 @@
static int
vnddoclear(struct vnd_softc *vnd, int pmask, int minor, bool force)
{
- int error;
-
- if ((error = vndlock(vnd)) != 0)
- return error;
+ mutex_enter(&vnd->sc_sclock);
/*
* Don't unconfigure if any other partitions are open
@@ -1023,7 +1020,7 @@
* partition are open.
*/
if (DK_BUSY(vnd, pmask) && !force) {
- vndunlock(vnd);
+ mutex_exit(&vnd->sc_sclock);
return EBUSY;
}
@@ -1035,10 +1032,10 @@
* release lock to avoid recursion
*
* Set VNF_CLEARING to prevent vndopen() from
- * sneaking in after we vndunlock().
+ * sneaking in after we mutex_exit().
*/
vnd->sc_flags |= VNF_CLEARING;
- vndunlock(vnd);
+ mutex_exit(&vnd->sc_sclock);
vndclear(vnd, minor);
#ifdef DEBUG
if (vnddebug & VDB_INIT)
@@ -1146,8 +1143,7 @@
if (vnd->sc_flags & VNF_INITED)
return EBUSY;
- if ((error = vndlock(vnd)) != 0)
- return error;
+ mutex_enter(&vnd->sc_sclock);
fflags = FREAD;
if ((vio->vnd_flags & VNDIOF_READONLY) == 0)
@@ -1163,7 +1159,7 @@
}
KASSERT(l);
error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred);
- if (!error && nd.ni_vp->v_type != VREG)
+ if (!error && vattr.va_type != VREG)
error = EOPNOTSUPP;
if (!error && vattr.va_bytes < vattr.va_size)
/* File is definitely sparse, use vn_rdwr() */
@@ -1181,11 +1177,11 @@
int i;
u_int32_t comp_size;
u_int32_t comp_maxsize;
-
+
/* allocate space for compresed file header */
ch = malloc(sizeof(struct vnd_comp_header),
M_TEMP, M_WAITOK);
-
+
/* read compressed file header */
error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch,
sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE,
@@ -1195,7 +1191,7 @@
VOP_UNLOCK(nd.ni_vp);
goto close_and_exit;
}
-
+
/* save some header info */
vnd->sc_comp_blksz = ntohl(ch->block_size);
/* note last offset is the file byte size */
@@ -1214,17 +1210,17 @@
error = EINVAL;
goto close_and_exit;
}
-
+
/* set decompressed file size */
vattr.va_size =
((u_quad_t)vnd->sc_comp_numoffs - 1) *
(u_quad_t)vnd->sc_comp_blksz;
-
+
/* allocate space for all the compressed offsets */
vnd->sc_comp_offsets =
malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs,
M_DEVBUF, M_WAITOK);
-
+
/* read in the offsets */
error = vn_rdwr(UIO_READ, nd.ni_vp,
(void *)vnd->sc_comp_offsets,
@@ -1250,16 +1246,16 @@
}
vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] =
be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]);
-
+
/* create compressed data buffer */
vnd->sc_comp_buff = malloc(comp_maxsize,
M_DEVBUF, M_WAITOK);
-
+
/* create decompressed buffer */
vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz,
M_DEVBUF, M_WAITOK);
vnd->sc_comp_buffblk = -1;
-
+
/* Initialize decompress stream */
memset(&vnd->sc_comp_stream, 0, sizeof(z_stream));
vnd->sc_comp_stream.zalloc = vnd_alloc;
@@ -1273,7 +1269,7 @@
error = EINVAL;
goto close_and_exit;
}
-
+
vnd->sc_flags |= VNF_COMP | VNF_READONLY;
#else /* !VND_COMPRESSION */
VOP_UNLOCK(nd.ni_vp);
@@ -1281,7 +1277,7 @@
goto close_and_exit;
#endif /* VND_COMPRESSION */
}
-
+
VOP_UNLOCK(nd.ni_vp);
vnd->sc_vp = nd.ni_vp;
vnd->sc_size = btodb(vattr.va_size); /* note truncation */
@@ -1359,13 +1355,13 @@
vio->vnd_size = dbtob(vnd->sc_size);
vnd->sc_flags |= VNF_INITED;
- /* create the kernel thread, wait for it to be up */
- error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd,
- &vnd->sc_kthread, "%s", device_xname(vnd->sc_dev));
- if (error)
+ vnd->sc_flags |= VNF_KTHREAD;
+ error = kthread_create(PRI_BIO, KTHREAD_MUSTJOIN, NULL,
+ vndthread, vnd, &vnd->sc_kthread, "%s",
+ device_xname(vnd->sc_dev));
+ if (error) {
+ vnd->sc_flags &= ~VNF_KTHREAD;
goto close_and_exit;
- while ((vnd->sc_flags & VNF_KTHREAD) == 0) {
- tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0);
}
#ifdef DEBUG
if (vnddebug & VDB_INIT)
@@ -1384,7 +1380,7 @@
pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0,
0, 0, "vndxpl", NULL, IPL_BIO);
- vndunlock(vnd);
+ mutex_exit(&vnd->sc_sclock);
pathbuf_destroy(pb);
@@ -1412,7 +1408,7 @@
vnd->sc_comp_decombuf = NULL;
}
#endif /* VND_COMPRESSION */
- vndunlock(vnd);
+ mutex_exit(&vnd->sc_sclock);
return error;
#ifdef VNDIOCCLR50
@@ -1504,8 +1500,8 @@
{
struct disklabel *lp;
- if ((error = vndlock(vnd)) != 0)
- return error;
+ /* XXX ? mutex_enter(&vnd->sc_sclock); */
+ /* Mustn't be locked for writedisklabel */
vnd->sc_flags |= VNF_LABELLING;
@@ -1533,7 +1529,7 @@
vnd->sc_flags &= ~VNF_LABELLING;
- vndunlock(vnd);
+ /* XXX ? mutex_exit(&vnd->sc_sclock); */
if (error)
return error;
@@ -1660,7 +1656,6 @@
struct vnode *vp = vnd->sc_vp;
int fflags = FREAD;
int bmaj, cmaj, i, mn;
- int s;
#ifdef DEBUG
if (vnddebug & VDB_FOLLOW)
@@ -1678,17 +1673,24 @@
vdevgone(cmaj, mn, mn, VCHR);
}
+ mutex_enter(&vnd->sc_sclock);
+
if ((vnd->sc_flags & VNF_READONLY) == 0)
fflags |= FWRITE;
- s = splbio();
bufq_drain(vnd->sc_tab);
- splx(s);
+ cv_broadcast(&vnd->sc_bufqcv);
- vnd->sc_flags |= VNF_VUNCONF;
- wakeup(&vnd->sc_tab);
- while (vnd->sc_flags & VNF_KTHREAD)
- tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0);
+ mutex_exit(&vnd->sc_sclock);
+
+ if (vnd->sc_flags & VNF_KTHREAD) {
+ mutex_enter(&vnd->sc_sclock);
+ vnd->sc_flags &= ~VNF_KTHREAD;
+ cv_signal(&vnd->sc_bufqcv);
+ cv_signal(&vnd->sc_activitycv);
+ mutex_exit(&vnd->sc_sclock);
+ kthread_join(vnd->sc_kthread);
+ }
#ifdef VND_COMPRESSION
/* free the compressed file buffers */
@@ -1709,7 +1711,7 @@
#endif /* VND_COMPRESSION */
vnd->sc_flags &=
~(VNF_INITED | VNF_READONLY | VNF_KLABEL | VNF_VLABEL
- | VNF_VUNCONF | VNF_COMP | VNF_CLEARING);
+ | VNF_COMP | VNF_CLEARING);
if (vp == NULL)
panic("vndclear: null vp");
(void) vn_close(vp, fflags, vnd->sc_cred);
@@ -1853,40 +1855,6 @@
}
}
-/*
- * Wait interruptibly for an exclusive lock.
- *
- * XXX
- * Several drivers do this; it should be abstracted and made MP-safe.
- */
-static int
-vndlock(struct vnd_softc *sc)
-{
- int error;
-
- while ((sc->sc_flags & VNF_LOCKED) != 0) {
- sc->sc_flags |= VNF_WANTED;
- if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0)
- return error;
- }
- sc->sc_flags |= VNF_LOCKED;
- return 0;
-}
-
-/*
- * Unlock and wake up any waiters.
- */
-static void
-vndunlock(struct vnd_softc *sc)
-{
-
- sc->sc_flags &= ~VNF_LOCKED;
- if ((sc->sc_flags & VNF_WANTED) != 0) {
- sc->sc_flags &= ~VNF_WANTED;
- wakeup(sc);
- }
-}
-
#ifdef VND_COMPRESSION
/* compressed file read */
static void
@@ -2041,7 +2009,7 @@
vnd_modcmd(modcmd_t cmd, void *arg)
{
int bmajor = -1, cmajor = -1, error = 0;
-
+
switch (cmd) {
case MODULE_CMD_INIT:
error = config_cfdriver_attach(&vnd_cd);
@@ -2051,11 +2019,11 @@
error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
if (error) {
config_cfdriver_detach(&vnd_cd);
- aprint_error("%s: unable to register cfattach\n",
+ aprint_error("%s: unable to register\n",
vnd_cd.cd_name);
break;
}
-
+
error = devsw_attach("vnd", &vnd_bdevsw, &bmajor,
&vnd_cdevsw, &cmajor);
if (error) {
@@ -2063,7 +2031,7 @@
config_cfdriver_detach(&vnd_cd);
break;
}
-
+
break;
case MODULE_CMD_FINI:
Index: vndvar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/vndvar.h,v
retrieving revision 1.33
diff -u -r1.33 vndvar.h
--- vndvar.h 3 Jun 2013 16:42:32 -0000 1.33
+++ vndvar.h 22 May 2015 12:00:32 -0000
@@ -106,15 +106,19 @@
* A vnode disk's state information.
*/
struct vnd_softc {
- device_t sc_dev;
+ device_t sc_dev;
+ kmutex_t sc_sclock; /* protect this softc */
int sc_flags; /* flags */
uint64_t sc_size; /* size of vnd */
struct vnode *sc_vp; /* vnode */
kauth_cred_t sc_cred; /* credentials */
- int sc_maxactive; /* max # of active requests */
struct bufq_state *sc_tab; /* transfer queue */
+ kcondvar_t sc_bufqcv; /* queue's condvar */
int sc_pending; /* number of pending transfers */
+ kcondvar_t sc_pendingcv; /* pending transfer condvar */
+ int sc_maxactive; /* max # of active requests */
int sc_active; /* number of active transfers */
+ kcondvar_t sc_activitycv; /* activity condvar */
struct disk sc_dkdev; /* generic disk device info */
struct vndgeom sc_geom; /* virtual geometry */
struct pool sc_vxpool; /* vndxfer pool */
@@ -134,13 +138,10 @@
#define VNF_INITED 0x001 /* unit has been initialized */
#define VNF_WLABEL 0x002 /* label area is writable */
#define VNF_LABELLING 0x004 /* unit is currently being labelled */
-#define VNF_WANTED 0x008 /* someone is waiting to obtain a lock */
-#define VNF_LOCKED 0x010 /* unit is locked */
#define VNF_READONLY 0x020 /* unit is read-only */
#define VNF_KLABEL 0x040 /* keep label on close */
#define VNF_VLABEL 0x080 /* label is valid */
#define VNF_KTHREAD 0x100 /* thread is running */
-#define VNF_VUNCONF 0x200 /* device is unconfiguring */
#define VNF_COMP 0x400 /* file is compressed */
#define VNF_CLEARING 0x800 /* unit is being torn down */
#define VNF_USE_VN_RDWR 0x1000 /* have to use vn_rdwr() */
Home |
Main Index |
Thread Index |
Old Index