Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/dev/sdmmc Fix races in sdmmc tasks and teach ld@sdmmc to...



details:   https://anonhg.NetBSD.org/src/rev/4d053f12cb43
branches:  trunk
changeset: 1010384:4d053f12cb43
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Sun May 24 17:26:18 2020 +0000

description:
Fix races in sdmmc tasks and teach ld@sdmmc to abort xfers on detach.

- Teach sdmmc_add_task to queue it only if not already queued.
- Remove now-redundant logic to avoid repeated queueing elsewhere.
- Teach sdmmc_del_task to wait until task has completed.
- Call sdmmc_del_task in various needful places.
- Replace abuse of pcq by a lock and a tailq.
  (pcq is multi-producer, _single_-consumer, but there are potentially
  multiple consumers here and really only one producer.)
- Teach ld_sdmmc to abort xfers on detach.
  (Mechanism is kinda kludgey but it'll do for now; any effort one is
  tempted to spend overhauling this should be spent overhauling sdmmc
  to support proper asynchronous commands.)
- Make sure ld_sdmmc_discard either returns failure or eventually calls
  ldenddiscard.

XXX Currently ld_sdmmc_detach aborts xfers _before_ ldbegindetach has
has committed to detaching or not.  This is currently necessary to
avoid a deadlock because ldbegindetach waits for xfers to drain --
which strikes me as wrong; ldbegindetach shouldn't wait for anything,
and should only make the decision to commit to detaching or not so
the caller can decide whether to abort xfers before we actually wait
for them in ldenddetach.

XXX pullup -- although this changes some kernel symbols (sdmmc_add_task
and sdmmc_del_task), it shouldn't affect any existing modules; the only
module that uses sdmmc is ld_sdmmc.kmod, which is `.if 0' in the build
so there shouldn't be any of them floating around.

diffstat:

 sys/dev/sdmmc/if_bwfm_sdio.c |   44 +-----
 sys/dev/sdmmc/ld_sdmmc.c     |  295 +++++++++++++++++++++++++++++++++++-------
 sys/dev/sdmmc/sdmmc.c        |   63 ++++++--
 sys/dev/sdmmc/sdmmc_io.c     |    9 +-
 sys/dev/sdmmc/sdmmcvar.h     |    8 +-
 5 files changed, 304 insertions(+), 115 deletions(-)

diffs (truncated from 764 to 300 lines):

diff -r 9ad569a7a7cf -r 4d053f12cb43 sys/dev/sdmmc/if_bwfm_sdio.c
--- a/sys/dev/sdmmc/if_bwfm_sdio.c      Sun May 24 15:35:39 2020 +0000
+++ b/sys/dev/sdmmc/if_bwfm_sdio.c      Sun May 24 17:26:18 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: if_bwfm_sdio.c,v 1.15 2020/05/07 11:46:27 macallan Exp $ */
+/* $NetBSD: if_bwfm_sdio.c,v 1.16 2020/05/24 17:26:18 riastradh Exp $ */
 /* $OpenBSD: if_bwfm_sdio.c,v 1.1 2017/10/11 17:19:50 patrick Exp $ */
 /*
  * Copyright (c) 2010-2016 Broadcom Corporation
@@ -69,7 +69,6 @@
 struct bwfm_sdio_softc {
        struct bwfm_softc       sc_sc;
        kmutex_t                sc_lock;
-       kmutex_t                sc_intr_lock;
 
        bool                    sc_bwfm_attached;
 
@@ -361,10 +360,8 @@
 
        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&sc->sc_rxctl_cv, "bwfmctl");
-       mutex_init(&sc->sc_intr_lock, MUTEX_DEFAULT, IPL_NONE);
 
        sdmmc_init_task(&sc->sc_task, bwfm_sdio_task, sc);
-       sc->sc_task_queued = false;
 
        sc->sc_bounce_size = 64 * 1024;
        sc->sc_bounce_buf = kmem_alloc(sc->sc_bounce_size, KM_SLEEP);
@@ -620,10 +617,11 @@
        if (sc->sc_bwfm_attached)
                bwfm_detach(&sc->sc_sc, flags);
 
+       sdmmc_del_task(sc->sc_sf[1]->sc, &sc->sc_task, NULL);
+
        kmem_free(sc->sc_sf, sc->sc_sf_size);
        kmem_free(sc->sc_bounce_buf, sc->sc_bounce_size);
 
-       mutex_destroy(&sc->sc_intr_lock);
        cv_destroy(&sc->sc_rxctl_cv);
        mutex_destroy(&sc->sc_lock);
 
@@ -1426,11 +1424,7 @@
 
        DPRINTF(("%s: %s\n", DEVNAME(sc), name));
 
-       mutex_enter(&sc->sc_intr_lock);
-       if (!sdmmc_task_pending(&sc->sc_task))
-               sdmmc_add_task(sc->sc_sf[1]->sc, &sc->sc_task);
-       sc->sc_task_queued = true;
-       mutex_exit(&sc->sc_intr_lock);
+       sdmmc_add_task(sc->sc_sf[1]->sc, &sc->sc_task);
        return 1;
 }
 
@@ -1444,33 +1438,13 @@
 bwfm_sdio_task(void *v)
 {
        struct bwfm_sdio_softc *sc = (void *)v;
+
+       mutex_enter(&sc->sc_lock);
+       bwfm_sdio_task1(sc);
 #ifdef BWFM_DEBUG
-       unsigned count = 0;
-#endif
-
-       mutex_enter(&sc->sc_intr_lock);
-       while (sc->sc_task_queued) {
-#ifdef BWFM_DEBUG
-               ++count;
+       bwfm_sdio_debug_console(sc);
 #endif
-               sc->sc_task_queued = false;
-               mutex_exit(&sc->sc_intr_lock);
-
-               mutex_enter(&sc->sc_lock);
-               bwfm_sdio_task1(sc);
-#ifdef BWFM_DEBUG
-               bwfm_sdio_debug_console(sc);
-#endif
-               mutex_exit(&sc->sc_lock);
-
-               mutex_enter(&sc->sc_intr_lock);
-       }
-       mutex_exit(&sc->sc_intr_lock);
-
-#ifdef BWFM_DEBUG
-       if (count > 1)
-               DPRINTF(("%s: finished %u tasks\n", DEVNAME(sc), count));
-#endif
+       mutex_exit(&sc->sc_lock);
 }
 
 static void
diff -r 9ad569a7a7cf -r 4d053f12cb43 sys/dev/sdmmc/ld_sdmmc.c
--- a/sys/dev/sdmmc/ld_sdmmc.c  Sun May 24 15:35:39 2020 +0000
+++ b/sys/dev/sdmmc/ld_sdmmc.c  Sun May 24 17:26:18 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: ld_sdmmc.c,v 1.37 2019/10/28 06:31:39 mlelstv Exp $    */
+/*     $NetBSD: ld_sdmmc.c,v 1.38 2020/05/24 17:26:18 riastradh Exp $  */
 
 /*
  * Copyright (c) 2008 KIYOHARA Takashi
@@ -28,7 +28,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ld_sdmmc.c,v 1.37 2019/10/28 06:31:39 mlelstv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ld_sdmmc.c,v 1.38 2020/05/24 17:26:18 riastradh Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_sdmmc.h"
@@ -59,7 +59,7 @@
 #ifdef LD_SDMMC_DEBUG
 #define DPRINTF(s)     printf s
 #else
-#define DPRINTF(s)     /**/
+#define DPRINTF(s)     __nothing
 #endif
 
 #define        LD_SDMMC_IORETRIES      5       /* number of retries before giving up */
@@ -72,32 +72,37 @@
 
 struct ld_sdmmc_task {
        struct sdmmc_task task;
-
        struct ld_sdmmc_softc *task_sc;
 
        struct buf *task_bp;
        int task_retries; /* number of xfer retry */
        struct callout task_restart_ch;
 
-       kmutex_t task_lock;
-       kcondvar_t task_cv;
+       bool task_poll;
+       int *task_errorp;
 
-       uintptr_t task_data;
+       TAILQ_ENTRY(ld_sdmmc_task) task_entry;
 };
 
 struct ld_sdmmc_softc {
        struct ld_softc sc_ld;
        int sc_hwunit;
-
+       char *sc_typename;
        struct sdmmc_function *sc_sf;
-       struct ld_sdmmc_task sc_task[LD_SDMMC_MAXTASKCNT];
-       pcq_t *sc_freeq;
-       char *sc_typename;
+
+       kmutex_t sc_lock;
+       kcondvar_t sc_cv;
+       TAILQ_HEAD(, ld_sdmmc_task) sc_freeq;
+       TAILQ_HEAD(, ld_sdmmc_task) sc_xferq;
+       unsigned sc_busy;
+       bool sc_dying;
 
        struct evcnt sc_ev_discard;     /* discard counter */
        struct evcnt sc_ev_discarderr;  /* discard error counter */
        struct evcnt sc_ev_discardbusy; /* discard busy counter */
        struct evcnt sc_ev_cachesyncbusy; /* cache sync busy counter */
+
+       struct ld_sdmmc_task sc_task[LD_SDMMC_MAXTASKCNT];
 };
 
 static int ld_sdmmc_match(device_t, cfdata_t, void *);
@@ -117,6 +122,108 @@
 CFATTACH_DECL_NEW(ld_sdmmc, sizeof(struct ld_sdmmc_softc),
     ld_sdmmc_match, ld_sdmmc_attach, ld_sdmmc_detach, NULL);
 
+static struct ld_sdmmc_task *
+ld_sdmmc_task_get(struct ld_sdmmc_softc *sc)
+{
+       struct ld_sdmmc_task *task;
+
+       KASSERT(mutex_owned(&sc->sc_lock));
+
+       if (sc->sc_dying || (task = TAILQ_FIRST(&sc->sc_freeq)) == NULL)
+               return NULL;
+       TAILQ_REMOVE(&sc->sc_freeq, task, task_entry);
+       TAILQ_INSERT_TAIL(&sc->sc_xferq, task, task_entry);
+       KASSERT(task->task_bp == NULL);
+       KASSERT(task->task_errorp == NULL);
+
+       return task;
+}
+
+static void
+ld_sdmmc_task_put(struct ld_sdmmc_softc *sc, struct ld_sdmmc_task *task)
+{
+
+       KASSERT(mutex_owned(&sc->sc_lock));
+
+       TAILQ_REMOVE(&sc->sc_xferq, task, task_entry);
+       TAILQ_INSERT_TAIL(&sc->sc_freeq, task, task_entry);
+       task->task_bp = NULL;
+       task->task_errorp = NULL;
+}
+
+static void
+ld_sdmmc_task_cancel(struct ld_sdmmc_softc *sc, struct ld_sdmmc_task *task)
+{
+       struct buf *bp;
+       int *errorp;
+
+       KASSERT(mutex_owned(&sc->sc_lock));
+       KASSERT(sc->sc_dying);
+
+       /*
+        * Either the callout or the task may be pending, but not both.
+        * First, determine whether the callout is pending.
+        */
+       if (callout_pending(&task->task_restart_ch) ||
+           callout_invoking(&task->task_restart_ch)) {
+               /*
+                * The callout either is pending, or just started but
+                * is waiting for us to release the lock.  At this
+                * point, it will notice sc->sc_dying and give up, so
+                * just wait for it to complete and then we will
+                * release everything.
+                */
+               callout_halt(&task->task_restart_ch, &sc->sc_lock);
+       } else {
+               /*
+                * If the callout is running, it has just scheduled, so
+                * after we wait for the callout to finish running, the
+                * task is either pending or running.  If the task is
+                * already running, it will notice sc->sc_dying and
+                * give up; otherwise we have to release everything.
+                */
+               callout_halt(&task->task_restart_ch, &sc->sc_lock);
+               if (!sdmmc_del_task(sc->sc_sf->sc, &task->task, &sc->sc_lock))
+                       return; /* task already started, let it clean up */
+       }
+
+       /*
+        * It is our responsibility to clean up.  Move it from xferq
+        * back to freeq and make sure to notify anyone waiting that
+        * it's finished.
+        */
+       bp = task->task_bp;
+       errorp = task->task_errorp;
+       ld_sdmmc_task_put(sc, task);
+
+       /*
+        * If the task was for an asynchronous I/O xfer, fail the I/O
+        * xfer, with the softc lock dropped since this is a callback
+        * into arbitrary other subsystems.
+        */
+       if (bp) {
+               mutex_exit(&sc->sc_lock);
+               /*
+                * XXX We assume that the same sequence works for bio
+                * and discard -- that lddiscardend is just the same as
+                * setting bp->b_resid = bp->b_bcount in the event of
+                * error and then calling lddone.
+                */
+               bp->b_error = ENXIO;
+               bp->b_resid = bp->b_bcount;
+               lddone(&sc->sc_ld, bp);
+               mutex_enter(&sc->sc_lock);
+       }
+
+       /*
+        * If the task was for a synchronous operation (cachesync),
+        * then just set the error indicator and wake up the waiter.
+        */
+       if (errorp) {
+               *errorp = ENXIO;
+               cv_broadcast(&sc->sc_cv);
+       }
+}
 
 /* ARGSUSED */
 static int
@@ -157,15 +264,18 @@
        evcnt_attach_dynamic(&sc->sc_ev_discardbusy, EVCNT_TYPE_MISC,
            NULL, device_xname(self), "sdmmc discard busy");
 
+       mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SDMMC);
+       cv_init(&sc->sc_cv, "ldsdmmc");
+       TAILQ_INIT(&sc->sc_freeq);
+       TAILQ_INIT(&sc->sc_xferq);
+       sc->sc_dying = false;
+
        const int ntask = __arraycount(sc->sc_task);
-       sc->sc_freeq = pcq_create(ntask, KM_SLEEP);
        for (i = 0; i < ntask; i++) {
                task = &sc->sc_task[i];
                task->task_sc = sc;
                callout_init(&task->task_restart_ch, CALLOUT_MPSAFE);
-               mutex_init(&task->task_lock, MUTEX_DEFAULT, IPL_NONE);
-               cv_init(&task->task_cv, "ldsdmmctask");
-               pcq_put(sc->sc_freeq, task);
+               TAILQ_INSERT_TAIL(&sc->sc_freeq, task, task_entry);
        }
 
        sc->sc_hwunit = 0;      /* always 0? */
@@ -231,19 +341,39 @@
 {



Home | Main Index | Thread Index | Old Index