NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: kern/58869: ipmi(4) holds sc_cmd_mtx across copyout, needed for wdog tickle



The attached patch series attempts to address this as proposed (with
some other cleanups along the way).

Also attached is a single giant diff with all the changes combined.

(I don't have time to test this myself so I don't plan to commit it
at the moment unless someone else wants to test it.)

This is not likely to affect Edgar's get_sdr_partial error at boot.
What it does prevent, if it works, is spurious watchdog reset because
tickling has been held up, or delayed envstat(8) updates, while an
ipmitool process is swapping in or out.
# HG changeset patch
# User Taylor R Campbell <riastradh%NetBSD.org@localhost>
# Date 1733238903 0
#      Tue Dec 03 15:15:03 2024 +0000
# Branch trunk
# Node ID a9fd9dbfe4d7587b99b818176e451b7950052e64
# Parent  eab5d84e73c8103a6990693dd75b2cdba777472b
# EXP-Topic riastradh-pr56568-ipmidelay
ipmi(4): Omit needless condvar/mutex for sleep.

Just use kpause instead.

Cleanup in preparation for:

PR kern/58869: ipmi(4) holds sc_cmd_mtx across copyout, needed for
wdog tickle

diff --git a/sys/dev/ipmi.c b/sys/dev/ipmi.c
--- a/sys/dev/ipmi.c
+++ b/sys/dev/ipmi.c
@@ -355,9 +355,7 @@ bmc_io_wait_sleep(struct ipmi_softc *sc,
 		v = bmc_read(sc, offset);
 		if ((v & mask) == value)
 			return v;
-		mutex_enter(&sc->sc_sleep_mtx);
-		cv_timedwait(&sc->sc_cmd_sleep, &sc->sc_sleep_mtx, 1);
-		mutex_exit(&sc->sc_sleep_mtx);
+		kpause("ipmicmd", /*intr*/false, /*timo*/1, /*mtx*/NULL);
 	}
 	return -1;
 }
@@ -1096,9 +1094,7 @@ ipmi_delay(struct ipmi_softc *sc, int ms
 		delay(ms * 1000);
 		return;
 	}
-	mutex_enter(&sc->sc_sleep_mtx);
-	cv_timedwait(&sc->sc_cmd_sleep, &sc->sc_sleep_mtx, mstohz(ms));
-	mutex_exit(&sc->sc_sleep_mtx);
+	kpause("ipmicmd", /*intr*/false, /*timo*/mstohz(ms), /*mtx*/NULL);
 }
 
 /* Read a partial SDR entry */
@@ -1967,12 +1963,10 @@ ipmi_match(device_t parent, cfdata_t cf,
 	sc.sc_if->probe(&sc);
 
 	mutex_init(&sc.sc_cmd_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
-	cv_init(&sc.sc_cmd_sleep, "ipmimtch");
 
 	if (ipmi_get_device_id(&sc, NULL) == 0)
 		rv = 1;
 
-	cv_destroy(&sc.sc_cmd_sleep);
 	mutex_destroy(&sc.sc_cmd_mtx);
 	ipmi_unmap_regs(&sc);
 
@@ -2150,8 +2144,6 @@ ipmi_attach(device_t parent, device_t se
 
 	/* lock around read_sensor so that no one messes with the bmc regs */
 	mutex_init(&sc->sc_cmd_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
-	mutex_init(&sc->sc_sleep_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
-	cv_init(&sc->sc_cmd_sleep, "ipmicmd");
 
 	mutex_init(&sc->sc_poll_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
 	cv_init(&sc->sc_poll_cv, "ipmipoll");
@@ -2215,8 +2207,6 @@ ipmi_detach(device_t self, int flags)
 	cv_destroy(&sc->sc_mode_cv);
 	cv_destroy(&sc->sc_poll_cv);
 	mutex_destroy(&sc->sc_poll_mtx);
-	cv_destroy(&sc->sc_cmd_sleep);
-	mutex_destroy(&sc->sc_sleep_mtx);
 	mutex_destroy(&sc->sc_cmd_mtx);
 
 	return 0;
diff --git a/sys/dev/ipmivar.h b/sys/dev/ipmivar.h
--- a/sys/dev/ipmivar.h
+++ b/sys/dev/ipmivar.h
@@ -95,8 +95,6 @@ struct ipmi_softc {
 	kcondvar_t		sc_mode_cv;
 
 	kmutex_t		sc_cmd_mtx;
-	kmutex_t		sc_sleep_mtx;
-	kcondvar_t		sc_cmd_sleep;
 
 	struct ipmi_bmc_args	*sc_iowait_args;
 
# HG changeset patch
# User Taylor R Campbell <riastradh%NetBSD.org@localhost>
# Date 1733239308 0
#      Tue Dec 03 15:21:48 2024 +0000
# Branch trunk
# Node ID acc525c4da4e26d67e193c33d922799554fed543
# Parent  a9fd9dbfe4d7587b99b818176e451b7950052e64
# EXP-Topic riastradh-pr56568-ipmidelay
sys/ipmi.h: Tidy header file.

Need <sys/ioccom.h> for _IOWR/_IOW/_IOR.  Nix trailing whitespace.

Cleanup in preparation for:

PR kern/58869: ipmi(4) holds sc_cmd_mtx across copyout, needed for
wdog tickle

diff --git a/sys/sys/ipmi.h b/sys/sys/ipmi.h
--- a/sys/sys/ipmi.h
+++ b/sys/sys/ipmi.h
@@ -32,6 +32,8 @@
 #ifndef _SYS_IPMI_H_
 #define	_SYS_IPMI_H_
 
+#include <sys/ioccom.h>
+
 #define	IPMI_MAX_ADDR_SIZE		0x20
 #define IPMI_MAX_RX			1024
 #define	IPMI_BMC_SLAVE_ADDR		0x20
@@ -99,7 +101,7 @@ struct ipmi_ipmb_addr {
 #define IPMICTL_SET_GETS_EVENTS_CMD     _IOW('i', 16, int)
 #define IPMICTL_SET_MY_ADDRESS_CMD      _IOW('i', 17, unsigned int)
 #define IPMICTL_GET_MY_ADDRESS_CMD      _IOR('i', 18, unsigned int)
-#define IPMICTL_SET_MY_LUN_CMD          _IOW('i', 19, unsigned int) 
+#define IPMICTL_SET_MY_LUN_CMD          _IOW('i', 19, unsigned int)
 #define IPMICTL_GET_MY_LUN_CMD          _IOR('i', 20, unsigned int)
 
 #endif /* !_SYS_IPMI_H_ */
# HG changeset patch
# User Taylor R Campbell <riastradh%NetBSD.org@localhost>
# Date 1733239370 0
#      Tue Dec 03 15:22:50 2024 +0000
# Branch trunk
# Node ID 4393453be9263ecde5a6fa6ac64c76b1bfc5490e
# Parent  acc525c4da4e26d67e193c33d922799554fed543
# EXP-Topic riastradh-pr56568-ipmidelay
ipmivar.h: Tidy includes and use a named constant, not a comment.

Cleanup in preparation for:

PR kern/58869: ipmi(4) holds sc_cmd_mtx across copyout, needed for
wdog tickle

diff --git a/sys/dev/ipmivar.h b/sys/dev/ipmivar.h
--- a/sys/dev/ipmivar.h
+++ b/sys/dev/ipmivar.h
@@ -24,17 +24,20 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
  */
 
-#include <sys/mutex.h>
-#include <sys/condvar.h>
-
-#include <dev/sysmon/sysmonvar.h>
-
 #ifndef _IPMIVAR_H_
 #define _IPMIVAR_H_
 
+#include <sys/types.h>
+
+#include <sys/bus.h>
+#include <sys/condvar.h>
+#include <sys/ipmi.h>
+#include <sys/mutex.h>
+
+#include <dev/sysmon/sysmonvar.h>
+
 #define IPMI_IF_KCS		1
 #define IPMI_IF_SMIC		2
 #define IPMI_IF_BT		3
@@ -107,7 +110,7 @@ struct ipmi_softc {
 	envsys_data_t		*sc_sensor;
 	int 		sc_nsensors; /* total number of sensors */
 
-	char		sc_buf[1024 + 3]; /* IPMI_MAX_RX + 3 */
+	char		sc_buf[IPMI_MAX_RX + 3];
 	bool		sc_buf_rsvd;
 
 	/* request busy */
# HG changeset patch
# User Taylor R Campbell <riastradh%NetBSD.org@localhost>
# Date 1733242069 0
#      Tue Dec 03 16:07:49 2024 +0000
# Branch trunk
# Node ID 9e9ab94042dd69884b43d2a67b12adf8013f7972
# Parent  4393453be9263ecde5a6fa6ac64c76b1bfc5490e
# EXP-Topic riastradh-pr56568-ipmidelay
ipmi(4): Nix trailing whitespace.

No functional change intended.

diff --git a/sys/dev/ipmi.c b/sys/dev/ipmi.c
--- a/sys/dev/ipmi.c
+++ b/sys/dev/ipmi.c
@@ -1298,7 +1298,7 @@ static int64_t fixexp_a[] = {
 	0x0000000080000000ll /* 1.0/2.0 */,
 	0x000000002aaaaaabll /* 1.0/6.0 */,
 	0x000000000aaaaaabll /* 1.0/24.0 */,
-	0x0000000002222222ll /* 1.0/120.0 */, 
+	0x0000000002222222ll /* 1.0/120.0 */,
 	0x00000000005b05b0ll /* 1.0/720.0 */,
 	0x00000000000d00d0ll /* 1.0/5040.0 */,
 	0x000000000001a01all /* 1.0/40320.0 */
@@ -1316,7 +1316,7 @@ fixmul(int64_t x, int64_t y)
 		x = -x;
 		neg = !neg;
 	}
-	if (y < 0) { 
+	if (y < 0) {
 		y = -y;
 		neg = !neg;
 	}
@@ -1803,7 +1803,7 @@ add_child_sensors(struct ipmi_softc *sc,
 	char			*e;
 	struct ipmi_sensor	*psensor;
 	struct sdrtype1		*s1 = (struct sdrtype1 *)psdr;
-	
+
 	typ = ipmi_sensor_type(sensor_type, ext_type, entity);
 	if (typ == -1) {
 		dbg_printf(5, "Unknown sensor type:%#.2x et:%#.2x sn:%#.2x "
@@ -2509,7 +2509,7 @@ ipmi_ioctl(dev_t dev, u_long cmd, void *
 		error = EOPNOTSUPP;
 		break;
 	default:
-		error = ENODEV;	
+		error = ENODEV;
 		break;
 	}
 
# HG changeset patch
# User Taylor R Campbell <riastradh%NetBSD.org@localhost>
# Date 1733245714 0
#      Tue Dec 03 17:08:34 2024 +0000
# Branch trunk
# Node ID 702bc3da2819f628a6d103b38eaa0e3cbfa40593
# Parent  9e9ab94042dd69884b43d2a67b12adf8013f7972
# EXP-Topic riastradh-pr56568-ipmidelay
ipmi(4): Avoid conflict between ioctl and watchdog/sensor thread.

1. On ioctl IPMICTL_SEND_COMMAND, immediately read a message into a
   user buffer that will persist until IPMICTL_RECEIVE_MSG.  (But
   wait for any concurrent IPMICTL_RECEIVE_MSG to finish first so the
   buffer is available.)

2. On ioctl IPMICTL_RECEIVE_MSG or IPMICTL_RECEIVE_MSG_TRUNC, mark
   the user buffer busy and read out of it with copyout -- without
   holding a mutex.

This way, the watchdog/sensor thread can issue commands while
IPMICTL_SEND_COMMAND and IPMICTL_RECEIVE_MSG are waiting for
copyin/copyout or malloc -- which may be indefinite waits I/O to swap
over the network, for example -- so it doesn't block tickling the
watchdog or updating sensors for envstat.

PR kern/58869: ipmi(4) holds sc_cmd_mtx across copyout, needed for
wdog tickle

diff --git a/sys/dev/ipmi.c b/sys/dev/ipmi.c
--- a/sys/dev/ipmi.c
+++ b/sys/dev/ipmi.c
@@ -986,6 +986,8 @@ ipmi_sendcmd(struct ipmi_softc *sc, int 
 	uint8_t	*buf;
 	int		rc = -1;
 
+	KASSERT(mutex_owned(&sc->sc_cmd_mtx));
+
 	dbg_printf(50, "%s: rssa=%#.2x nfln=%#.2x cmd=%#.2x len=%#.2x\n",
 	    __func__, rssa, NETFN_LUN(netfn, rslun), cmd, txlen);
 	dbg_dump(10, __func__, txlen, data);
@@ -1054,6 +1056,8 @@ ipmi_recvcmd(struct ipmi_softc *sc, int 
 	uint8_t	*buf, rc = 0;
 	int		rawlen;
 
+	KASSERT(mutex_owned(&sc->sc_cmd_mtx));
+
 	/* Need three extra bytes: netfn/cmd/ccode + data */
 	buf = ipmi_buf_acquire(sc, maxlen + 3);
 	if (buf == NULL) {
@@ -1090,6 +1094,9 @@ ipmi_recvcmd(struct ipmi_softc *sc, int 
 static void
 ipmi_delay(struct ipmi_softc *sc, int ms)
 {
+
+	KASSERT(mutex_owned(&sc->sc_cmd_mtx));
+
 	if (cold) {
 		delay(ms * 1000);
 		return;
@@ -2110,21 +2117,12 @@ ipmi_thread(void *cookie)
 	ipmi_enabled = 1;
 
 	mutex_enter(&sc->sc_poll_mtx);
-	sc->sc_thread_ready = true;
-	cv_broadcast(&sc->sc_mode_cv);
 	while (sc->sc_thread_running) {
-		while (sc->sc_mode == IPMI_MODE_COMMAND)
-			cv_wait(&sc->sc_mode_cv, &sc->sc_poll_mtx);
-		sc->sc_mode = IPMI_MODE_ENVSYS;
-
 		if (sc->sc_tickle_due) {
 			ipmi_dotickle(sc);
 			sc->sc_tickle_due = false;
 		}
 		ipmi_refresh_sensors(sc);
-
-		sc->sc_mode = IPMI_MODE_IDLE;
-		cv_broadcast(&sc->sc_mode_cv);
 		cv_timedwait(&sc->sc_poll_cv, &sc->sc_poll_mtx,
 		    SENSOR_REFRESH_RATE);
 	}
@@ -2144,10 +2142,10 @@ ipmi_attach(device_t parent, device_t se
 
 	/* lock around read_sensor so that no one messes with the bmc regs */
 	mutex_init(&sc->sc_cmd_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
+	cv_init(&sc->sc_userbuf_cv, "ipmiuser");
 
 	mutex_init(&sc->sc_poll_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
 	cv_init(&sc->sc_poll_cv, "ipmipoll");
-	cv_init(&sc->sc_mode_cv, "ipmimode");
 
 	if (kthread_create(PRI_NONE, KTHREAD_MUSTJOIN, NULL, ipmi_thread, self,
 	    &sc->sc_kthread, "%s", device_xname(self)) != 0) {
@@ -2204,9 +2202,9 @@ ipmi_detach(device_t self, int flags)
 
 	ipmi_unmap_regs(sc);
 
-	cv_destroy(&sc->sc_mode_cv);
 	cv_destroy(&sc->sc_poll_cv);
 	mutex_destroy(&sc->sc_poll_mtx);
+	cv_destroy(&sc->sc_userbuf_cv);
 	mutex_destroy(&sc->sc_cmd_mtx);
 
 	return 0;
@@ -2255,15 +2253,6 @@ ipmi_watchdog_setmode(struct sysmon_wdog
 	else
 		sc->sc_wdog.smw_period = smwdog->smw_period;
 
-	/* Wait until the device is initialized */
-	rc = 0;
-	mutex_enter(&sc->sc_poll_mtx);
-	while (sc->sc_thread_ready)
-		rc = cv_wait_sig(&sc->sc_mode_cv, &sc->sc_poll_mtx);
-	mutex_exit(&sc->sc_poll_mtx);
-	if (rc)
-		return rc;
-
 	mutex_enter(&sc->sc_cmd_mtx);
 	/* see if we can properly task to the watchdog */
 	rc = ipmi_sendcmd(sc, BMC_SA, BMC_LUN, APP_NETFN,
@@ -2362,12 +2351,12 @@ ipmi_close(dev_t dev, int flag, int fmt,
 	if ((sc = device_lookup_private(&ipmi_cd, unit)) == NULL)
 		return (ENXIO);
 
-	mutex_enter(&sc->sc_poll_mtx);
-	if (sc->sc_mode == IPMI_MODE_COMMAND) {
-		sc->sc_mode = IPMI_MODE_IDLE;
-		cv_broadcast(&sc->sc_mode_cv);
-	}
-	mutex_exit(&sc->sc_poll_mtx);
+	mutex_enter(&sc->sc_cmd_mtx);
+	KASSERT(!sc->sc_userbuf_busy);
+	memset(sc->sc_userbuf, 0, sizeof(sc->sc_userbuf)); /* paranoia */
+	sc->sc_userbuf_len = 0;
+	mutex_exit(&sc->sc_cmd_mtx);
+
 	return 0;
 }
 
@@ -2387,62 +2376,68 @@ ipmi_ioctl(dev_t dev, u_long cmd, void *
 
 	switch (cmd) {
 	case IPMICTL_SEND_COMMAND:
-		mutex_enter(&sc->sc_poll_mtx);
-		while (sc->sc_mode == IPMI_MODE_ENVSYS) {
-			error = cv_wait_sig(&sc->sc_mode_cv, &sc->sc_poll_mtx);
-			if (error == EINTR) {
-				mutex_exit(&sc->sc_poll_mtx);
-				return error;
-			}
-		}
-		sc->sc_mode = IPMI_MODE_COMMAND;
-		mutex_exit(&sc->sc_poll_mtx);
-		break;
-	}
-
-	mutex_enter(&sc->sc_cmd_mtx);
-
-	switch (cmd) {
-	case IPMICTL_SEND_COMMAND:
 		req = data;
-		buf = malloc(IPMI_MAX_RX, M_DEVBUF, M_WAITOK);
-
 		len = req->msg.data_len;
 		if (len < 0 || len > IPMI_MAX_RX) {
 			error = EINVAL;
 			break;
 		}
 
-		/* clear pending result */
-		if (sc->sc_sent)
-			(void)ipmi_recvcmd(sc, IPMI_MAX_RX, &len, buf);
-
 		/* XXX */
 		error = copyin(req->addr, &addr, sizeof(addr));
 		if (error)
 			break;
 
+		buf = malloc(IPMI_MAX_RX, M_DEVBUF, M_WAITOK);
 		error = copyin(req->msg.data, buf, len);
 		if (error)
 			break;
 
-		/* save for receive */
+		/*
+		 * Acquire the lock to serialize ipmi_sendcmd/recvcmd
+		 * and access to the saved command response state.
+		 */
+		mutex_enter(&sc->sc_cmd_mtx);
+
+		/*
+		 * Wait for any concurrent copyouts of the user buffer
+		 * in IPMICTL_RECEIVE_MSG to complete before we can
+		 * reuse the user buffer.
+		 */
+		while (sc->sc_userbuf_busy) {
+			error = cv_wait_sig(&sc->sc_userbuf_cv,
+			    &sc->sc_cmd_mtx);
+			if (error)
+				break;
+		}
+		if (error) {
+			mutex_exit(&sc->sc_cmd_mtx);
+			break;
+		}
+
+		/*
+		 * Send the command and receive the response into the
+		 * user buffer.  Record the rest of the parameters to
+		 * pass back from IPMICTL_RECEIVE_MSG.
+		 */
+		if (ipmi_sendcmd(sc, BMC_SA, 0, req->msg.netfn,
+		    req->msg.cmd, len, buf)) {
+			error = EIO;
+			mutex_exit(&sc->sc_cmd_mtx);
+			break;
+		}
+		sc->sc_error = ipmi_recvcmd(sc, IPMI_MAX_RX,
+		    &sc->sc_userbuf_len, sc->sc_userbuf);
 		sc->sc_msgid = req->msgid;
 		sc->sc_netfn = req->msg.netfn;
 		sc->sc_cmd = req->msg.cmd;
-
-		if (ipmi_sendcmd(sc, BMC_SA, 0, req->msg.netfn,
-		    req->msg.cmd, len, buf)) {
-			error = EIO;
-			break;
-		}
 		sc->sc_sent = true;
+
+		mutex_exit(&sc->sc_cmd_mtx);
 		break;
 	case IPMICTL_RECEIVE_MSG_TRUNC:
 	case IPMICTL_RECEIVE_MSG:
 		recv = data;
-		buf = malloc(IPMI_MAX_RX, M_DEVBUF, M_WAITOK);
-
 		if (recv->msg.data_len < 1) {
 			error = EINVAL;
 			break;
@@ -2453,24 +2448,36 @@ ipmi_ioctl(dev_t dev, u_long cmd, void *
 		if (error)
 			break;
 
+		mutex_enter(&sc->sc_cmd_mtx);
+
+		/*
+		 * Wait for any concurrent copyouts of the user buffer
+		 * in IPMICTL_RECEIVE_MSG to complete.
+		 */
+		while (sc->sc_userbuf_busy) {
+			error = cv_wait_sig(&sc->sc_userbuf_cv,
+			    &sc->sc_cmd_mtx);
+			if (error)
+				break;
+		}
+		if (error) {
+			mutex_exit(&sc->sc_cmd_mtx);
+			break;
+		}
 
 		if (!sc->sc_sent) {
 			error = EIO;
+			mutex_exit(&sc->sc_cmd_mtx);
 			break;
 		}
-
-		len = 0;
-		error = ipmi_recvcmd(sc, IPMI_MAX_RX, &len, buf);
-		if (error < 0) {
-			error = EIO;
-			break;
-		}
-		ccode = (unsigned char)error;
 		sc->sc_sent = false;
+		len = sc->sc_userbuf_len;
+		ccode = (unsigned char)sc->sc_error;
 
 		if (len > recv->msg.data_len - 1) {
 			if (cmd == IPMICTL_RECEIVE_MSG) {
 				error = EMSGSIZE;
+				mutex_exit(&sc->sc_cmd_mtx);
 				break;
 			}
 			len = recv->msg.data_len - 1;
@@ -2484,23 +2491,54 @@ ipmi_ioctl(dev_t dev, u_long cmd, void *
 		recv->msg.cmd = sc->sc_cmd;
 		recv->msg.data_len = len+1;
 
+		/*
+		 * Mark the user buffer busy and drop sc_cmd_mtx so we
+		 * can copyout without blocking watchdog tickle or
+		 * sensor updates.
+		 */
+		sc->sc_userbuf_busy = true;
+		mutex_exit(&sc->sc_cmd_mtx);
+
 		error = copyout(&addr, recv->addr, sizeof(addr));
 		if (error == 0)
 			error = copyout(&ccode, recv->msg.data, 1);
 		if (error == 0)
-			error = copyout(buf, recv->msg.data+1, len);
+			error = copyout(sc->sc_userbuf, recv->msg.data+1, len);
+
+		/*
+		 * Copyout done, release the user buffer and wake any
+		 * processes waiting to send commands.
+		 *
+		 * (Broadcast, not signal -- if there are two processes
+		 * waiting in IPMICTL_SEND_COMMAND, and we only wake
+		 * one which _doesn't_ proceed to IPMICTL_RECEIVE_MSG,
+		 * the other will wait indefinitely.)
+		 */
+		mutex_enter(&sc->sc_cmd_mtx);
+		KASSERT(sc->sc_userbuf_busy);
+		sc->sc_userbuf_busy = false;
+		cv_broadcast(&sc->sc_userbuf_cv);
+		mutex_exit(&sc->sc_cmd_mtx);
 		break;
 	case IPMICTL_SET_MY_ADDRESS_CMD:
+		mutex_enter(&sc->sc_cmd_mtx);
 		sc->sc_address = *(int *)data;
+		mutex_exit(&sc->sc_cmd_mtx);
 		break;
 	case IPMICTL_GET_MY_ADDRESS_CMD:
+		mutex_enter(&sc->sc_cmd_mtx);
 		*(int *)data = sc->sc_address;
+		mutex_exit(&sc->sc_cmd_mtx);
 		break;
 	case IPMICTL_SET_MY_LUN_CMD:
+		mutex_enter(&sc->sc_cmd_mtx);
 		sc->sc_lun = *(int *)data & 0x3;
+		mutex_exit(&sc->sc_cmd_mtx);
 		break;
 	case IPMICTL_GET_MY_LUN_CMD:
+		mutex_enter(&sc->sc_cmd_mtx);
 		*(int *)data = sc->sc_lun;
+		mutex_exit(&sc->sc_cmd_mtx);
 		break;
 	case IPMICTL_SET_GETS_EVENTS_CMD:
 		break;
@@ -2515,19 +2553,6 @@ ipmi_ioctl(dev_t dev, u_long cmd, void *
 
 	if (buf)
 		free(buf, M_DEVBUF);
-
-	mutex_exit(&sc->sc_cmd_mtx);
-
-	switch (cmd) {
-	case IPMICTL_RECEIVE_MSG:
-	case IPMICTL_RECEIVE_MSG_TRUNC:
-		mutex_enter(&sc->sc_poll_mtx);
-		sc->sc_mode = IPMI_MODE_IDLE;
-		cv_broadcast(&sc->sc_mode_cv);
-		mutex_exit(&sc->sc_poll_mtx);
-		break;
-	}
-
 	return error;
 }
 
diff --git a/sys/dev/ipmivar.h b/sys/dev/ipmivar.h
--- a/sys/dev/ipmivar.h
+++ b/sys/dev/ipmivar.h
@@ -95,7 +95,6 @@ struct ipmi_softc {
 
 	kmutex_t		sc_poll_mtx;
 	kcondvar_t		sc_poll_cv;
-	kcondvar_t		sc_mode_cv;
 
 	kmutex_t		sc_cmd_mtx;
 
@@ -103,7 +102,6 @@ struct ipmi_softc {
 
 	struct ipmi_sensor	*current_sensor;
 	bool			sc_thread_running;
-	bool			sc_thread_ready;
 	bool			sc_tickle_due;
 	struct sysmon_wdog	sc_wdog;
 	struct sysmon_envsys	*sc_envsys;
@@ -113,12 +111,13 @@ struct ipmi_softc {
 	char		sc_buf[IPMI_MAX_RX + 3];
 	bool		sc_buf_rsvd;
 
-	/* request busy */
-	int		sc_mode;
-#define IPMI_MODE_IDLE		0
-#define IPMI_MODE_COMMAND	1
-#define IPMI_MODE_ENVSYS	2
+	/* user command response buffer */
+	kcondvar_t	sc_userbuf_cv;
+	int		sc_userbuf_len;
+	bool		sc_userbuf_busy;
 	bool		sc_sent;
+	unsigned char	sc_error;
+	char		sc_userbuf[IPMI_MAX_RX + 3];
 
 	/* dummy */
 	int		sc_address;
diff -r eab5d84e73c8 -r 702bc3da2819 sys/dev/ipmi.c
--- a/sys/dev/ipmi.c	Tue Dec 03 00:13:12 2024 +0000
+++ b/sys/dev/ipmi.c	Tue Dec 03 17:08:34 2024 +0000
@@ -355,9 +355,7 @@ bmc_io_wait_sleep(struct ipmi_softc *sc,
 		v = bmc_read(sc, offset);
 		if ((v & mask) == value)
 			return v;
-		mutex_enter(&sc->sc_sleep_mtx);
-		cv_timedwait(&sc->sc_cmd_sleep, &sc->sc_sleep_mtx, 1);
-		mutex_exit(&sc->sc_sleep_mtx);
+		kpause("ipmicmd", /*intr*/false, /*timo*/1, /*mtx*/NULL);
 	}
 	return -1;
 }
@@ -988,6 +986,8 @@ ipmi_sendcmd(struct ipmi_softc *sc, int 
 	uint8_t	*buf;
 	int		rc = -1;
 
+	KASSERT(mutex_owned(&sc->sc_cmd_mtx));
+
 	dbg_printf(50, "%s: rssa=%#.2x nfln=%#.2x cmd=%#.2x len=%#.2x\n",
 	    __func__, rssa, NETFN_LUN(netfn, rslun), cmd, txlen);
 	dbg_dump(10, __func__, txlen, data);
@@ -1056,6 +1056,8 @@ ipmi_recvcmd(struct ipmi_softc *sc, int 
 	uint8_t	*buf, rc = 0;
 	int		rawlen;
 
+	KASSERT(mutex_owned(&sc->sc_cmd_mtx));
+
 	/* Need three extra bytes: netfn/cmd/ccode + data */
 	buf = ipmi_buf_acquire(sc, maxlen + 3);
 	if (buf == NULL) {
@@ -1092,13 +1094,14 @@ ipmi_recvcmd(struct ipmi_softc *sc, int 
 static void
 ipmi_delay(struct ipmi_softc *sc, int ms)
 {
+
+	KASSERT(mutex_owned(&sc->sc_cmd_mtx));
+
 	if (cold) {
 		delay(ms * 1000);
 		return;
 	}
-	mutex_enter(&sc->sc_sleep_mtx);
-	cv_timedwait(&sc->sc_cmd_sleep, &sc->sc_sleep_mtx, mstohz(ms));
-	mutex_exit(&sc->sc_sleep_mtx);
+	kpause("ipmicmd", /*intr*/false, /*timo*/mstohz(ms), /*mtx*/NULL);
 }
 
 /* Read a partial SDR entry */
@@ -1302,7 +1305,7 @@ static int64_t fixexp_a[] = {
 	0x0000000080000000ll /* 1.0/2.0 */,
 	0x000000002aaaaaabll /* 1.0/6.0 */,
 	0x000000000aaaaaabll /* 1.0/24.0 */,
-	0x0000000002222222ll /* 1.0/120.0 */, 
+	0x0000000002222222ll /* 1.0/120.0 */,
 	0x00000000005b05b0ll /* 1.0/720.0 */,
 	0x00000000000d00d0ll /* 1.0/5040.0 */,
 	0x000000000001a01all /* 1.0/40320.0 */
@@ -1320,7 +1323,7 @@ fixmul(int64_t x, int64_t y)
 		x = -x;
 		neg = !neg;
 	}
-	if (y < 0) { 
+	if (y < 0) {
 		y = -y;
 		neg = !neg;
 	}
@@ -1807,7 +1810,7 @@ add_child_sensors(struct ipmi_softc *sc,
 	char			*e;
 	struct ipmi_sensor	*psensor;
 	struct sdrtype1		*s1 = (struct sdrtype1 *)psdr;
-	
+
 	typ = ipmi_sensor_type(sensor_type, ext_type, entity);
 	if (typ == -1) {
 		dbg_printf(5, "Unknown sensor type:%#.2x et:%#.2x sn:%#.2x "
@@ -1967,12 +1970,10 @@ ipmi_match(device_t parent, cfdata_t cf,
 	sc.sc_if->probe(&sc);
 
 	mutex_init(&sc.sc_cmd_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
-	cv_init(&sc.sc_cmd_sleep, "ipmimtch");
 
 	if (ipmi_get_device_id(&sc, NULL) == 0)
 		rv = 1;
 
-	cv_destroy(&sc.sc_cmd_sleep);
 	mutex_destroy(&sc.sc_cmd_mtx);
 	ipmi_unmap_regs(&sc);
 
@@ -2116,21 +2117,12 @@ ipmi_thread(void *cookie)
 	ipmi_enabled = 1;
 
 	mutex_enter(&sc->sc_poll_mtx);
-	sc->sc_thread_ready = true;
-	cv_broadcast(&sc->sc_mode_cv);
 	while (sc->sc_thread_running) {
-		while (sc->sc_mode == IPMI_MODE_COMMAND)
-			cv_wait(&sc->sc_mode_cv, &sc->sc_poll_mtx);
-		sc->sc_mode = IPMI_MODE_ENVSYS;
-
 		if (sc->sc_tickle_due) {
 			ipmi_dotickle(sc);
 			sc->sc_tickle_due = false;
 		}
 		ipmi_refresh_sensors(sc);
-
-		sc->sc_mode = IPMI_MODE_IDLE;
-		cv_broadcast(&sc->sc_mode_cv);
 		cv_timedwait(&sc->sc_poll_cv, &sc->sc_poll_mtx,
 		    SENSOR_REFRESH_RATE);
 	}
@@ -2150,12 +2142,10 @@ ipmi_attach(device_t parent, device_t se
 
 	/* lock around read_sensor so that no one messes with the bmc regs */
 	mutex_init(&sc->sc_cmd_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
-	mutex_init(&sc->sc_sleep_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
-	cv_init(&sc->sc_cmd_sleep, "ipmicmd");
+	cv_init(&sc->sc_userbuf_cv, "ipmiuser");
 
 	mutex_init(&sc->sc_poll_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
 	cv_init(&sc->sc_poll_cv, "ipmipoll");
-	cv_init(&sc->sc_mode_cv, "ipmimode");
 
 	if (kthread_create(PRI_NONE, KTHREAD_MUSTJOIN, NULL, ipmi_thread, self,
 	    &sc->sc_kthread, "%s", device_xname(self)) != 0) {
@@ -2212,11 +2202,9 @@ ipmi_detach(device_t self, int flags)
 
 	ipmi_unmap_regs(sc);
 
-	cv_destroy(&sc->sc_mode_cv);
 	cv_destroy(&sc->sc_poll_cv);
 	mutex_destroy(&sc->sc_poll_mtx);
-	cv_destroy(&sc->sc_cmd_sleep);
-	mutex_destroy(&sc->sc_sleep_mtx);
+	cv_destroy(&sc->sc_userbuf_cv);
 	mutex_destroy(&sc->sc_cmd_mtx);
 
 	return 0;
@@ -2265,15 +2253,6 @@ ipmi_watchdog_setmode(struct sysmon_wdog
 	else
 		sc->sc_wdog.smw_period = smwdog->smw_period;
 
-	/* Wait until the device is initialized */
-	rc = 0;
-	mutex_enter(&sc->sc_poll_mtx);
-	while (sc->sc_thread_ready)
-		rc = cv_wait_sig(&sc->sc_mode_cv, &sc->sc_poll_mtx);
-	mutex_exit(&sc->sc_poll_mtx);
-	if (rc)
-		return rc;
-
 	mutex_enter(&sc->sc_cmd_mtx);
 	/* see if we can properly task to the watchdog */
 	rc = ipmi_sendcmd(sc, BMC_SA, BMC_LUN, APP_NETFN,
@@ -2372,12 +2351,12 @@ ipmi_close(dev_t dev, int flag, int fmt,
 	if ((sc = device_lookup_private(&ipmi_cd, unit)) == NULL)
 		return (ENXIO);
 
-	mutex_enter(&sc->sc_poll_mtx);
-	if (sc->sc_mode == IPMI_MODE_COMMAND) {
-		sc->sc_mode = IPMI_MODE_IDLE;
-		cv_broadcast(&sc->sc_mode_cv);
-	}
-	mutex_exit(&sc->sc_poll_mtx);
+	mutex_enter(&sc->sc_cmd_mtx);
+	KASSERT(!sc->sc_userbuf_busy);
+	memset(sc->sc_userbuf, 0, sizeof(sc->sc_userbuf)); /* paranoia */
+	sc->sc_userbuf_len = 0;
+	mutex_exit(&sc->sc_cmd_mtx);
+
 	return 0;
 }
 
@@ -2397,62 +2376,68 @@ ipmi_ioctl(dev_t dev, u_long cmd, void *
 
 	switch (cmd) {
 	case IPMICTL_SEND_COMMAND:
-		mutex_enter(&sc->sc_poll_mtx);
-		while (sc->sc_mode == IPMI_MODE_ENVSYS) {
-			error = cv_wait_sig(&sc->sc_mode_cv, &sc->sc_poll_mtx);
-			if (error == EINTR) {
-				mutex_exit(&sc->sc_poll_mtx);
-				return error;
-			}
-		}
-		sc->sc_mode = IPMI_MODE_COMMAND;
-		mutex_exit(&sc->sc_poll_mtx);
-		break;
-	}
-
-	mutex_enter(&sc->sc_cmd_mtx);
-
-	switch (cmd) {
-	case IPMICTL_SEND_COMMAND:
 		req = data;
-		buf = malloc(IPMI_MAX_RX, M_DEVBUF, M_WAITOK);
-
 		len = req->msg.data_len;
 		if (len < 0 || len > IPMI_MAX_RX) {
 			error = EINVAL;
 			break;
 		}
 
-		/* clear pending result */
-		if (sc->sc_sent)
-			(void)ipmi_recvcmd(sc, IPMI_MAX_RX, &len, buf);
-
 		/* XXX */
 		error = copyin(req->addr, &addr, sizeof(addr));
 		if (error)
 			break;
 
+		buf = malloc(IPMI_MAX_RX, M_DEVBUF, M_WAITOK);
 		error = copyin(req->msg.data, buf, len);
 		if (error)
 			break;
 
-		/* save for receive */
+		/*
+		 * Acquire the lock to serialize ipmi_sendcmd/recvcmd
+		 * and access to the saved command response state.
+		 */
+		mutex_enter(&sc->sc_cmd_mtx);
+
+		/*
+		 * Wait for any concurrent copyouts of the user buffer
+		 * in IPMICTL_RECEIVE_MSG to complete before we can
+		 * reuse the user buffer.
+		 */
+		while (sc->sc_userbuf_busy) {
+			error = cv_wait_sig(&sc->sc_userbuf_cv,
+			    &sc->sc_cmd_mtx);
+			if (error)
+				break;
+		}
+		if (error) {
+			mutex_exit(&sc->sc_cmd_mtx);
+			break;
+		}
+
+		/*
+		 * Send the command and receive the response into the
+		 * user buffer.  Record the rest of the parameters to
+		 * pass back from IPMICTL_RECEIVE_MSG.
+		 */
+		if (ipmi_sendcmd(sc, BMC_SA, 0, req->msg.netfn,
+		    req->msg.cmd, len, buf)) {
+			error = EIO;
+			mutex_exit(&sc->sc_cmd_mtx);
+			break;
+		}
+		sc->sc_error = ipmi_recvcmd(sc, IPMI_MAX_RX,
+		    &sc->sc_userbuf_len, sc->sc_userbuf);
 		sc->sc_msgid = req->msgid;
 		sc->sc_netfn = req->msg.netfn;
 		sc->sc_cmd = req->msg.cmd;
-
-		if (ipmi_sendcmd(sc, BMC_SA, 0, req->msg.netfn,
-		    req->msg.cmd, len, buf)) {
-			error = EIO;
-			break;
-		}
 		sc->sc_sent = true;
+
+		mutex_exit(&sc->sc_cmd_mtx);
 		break;
 	case IPMICTL_RECEIVE_MSG_TRUNC:
 	case IPMICTL_RECEIVE_MSG:
 		recv = data;
-		buf = malloc(IPMI_MAX_RX, M_DEVBUF, M_WAITOK);
-
 		if (recv->msg.data_len < 1) {
 			error = EINVAL;
 			break;
@@ -2463,24 +2448,36 @@ ipmi_ioctl(dev_t dev, u_long cmd, void *
 		if (error)
 			break;
 
+		mutex_enter(&sc->sc_cmd_mtx);
+
+		/*
+		 * Wait for any concurrent copyouts of the user buffer
+		 * in IPMICTL_RECEIVE_MSG to complete.
+		 */
+		while (sc->sc_userbuf_busy) {
+			error = cv_wait_sig(&sc->sc_userbuf_cv,
+			    &sc->sc_cmd_mtx);
+			if (error)
+				break;
+		}
+		if (error) {
+			mutex_exit(&sc->sc_cmd_mtx);
+			break;
+		}
 
 		if (!sc->sc_sent) {
 			error = EIO;
+			mutex_exit(&sc->sc_cmd_mtx);
 			break;
 		}
-
-		len = 0;
-		error = ipmi_recvcmd(sc, IPMI_MAX_RX, &len, buf);
-		if (error < 0) {
-			error = EIO;
-			break;
-		}
-		ccode = (unsigned char)error;
 		sc->sc_sent = false;
+		len = sc->sc_userbuf_len;
+		ccode = (unsigned char)sc->sc_error;
 
 		if (len > recv->msg.data_len - 1) {
 			if (cmd == IPMICTL_RECEIVE_MSG) {
 				error = EMSGSIZE;
+				mutex_exit(&sc->sc_cmd_mtx);
 				break;
 			}
 			len = recv->msg.data_len - 1;
@@ -2494,23 +2491,54 @@ ipmi_ioctl(dev_t dev, u_long cmd, void *
 		recv->msg.cmd = sc->sc_cmd;
 		recv->msg.data_len = len+1;
 
+		/*
+		 * Mark the user buffer busy and drop sc_cmd_mtx so we
+		 * can copyout without blocking watchdog tickle or
+		 * sensor updates.
+		 */
+		sc->sc_userbuf_busy = true;
+		mutex_exit(&sc->sc_cmd_mtx);
+
 		error = copyout(&addr, recv->addr, sizeof(addr));
 		if (error == 0)
 			error = copyout(&ccode, recv->msg.data, 1);
 		if (error == 0)
-			error = copyout(buf, recv->msg.data+1, len);
+			error = copyout(sc->sc_userbuf, recv->msg.data+1, len);
+
+		/*
+		 * Copyout done, release the user buffer and wake any
+		 * processes waiting to send commands.
+		 *
+		 * (Broadcast, not signal -- if there are two processes
+		 * waiting in IPMICTL_SEND_COMMAND, and we only wake
+		 * one which _doesn't_ proceed to IPMICTL_RECEIVE_MSG,
+		 * the other will wait indefinitely.)
+		 */
+		mutex_enter(&sc->sc_cmd_mtx);
+		KASSERT(sc->sc_userbuf_busy);
+		sc->sc_userbuf_busy = false;
+		cv_broadcast(&sc->sc_userbuf_cv);
+		mutex_exit(&sc->sc_cmd_mtx);
 		break;
 	case IPMICTL_SET_MY_ADDRESS_CMD:
+		mutex_enter(&sc->sc_cmd_mtx);
 		sc->sc_address = *(int *)data;
+		mutex_exit(&sc->sc_cmd_mtx);
 		break;
 	case IPMICTL_GET_MY_ADDRESS_CMD:
+		mutex_enter(&sc->sc_cmd_mtx);
 		*(int *)data = sc->sc_address;
+		mutex_exit(&sc->sc_cmd_mtx);
 		break;
 	case IPMICTL_SET_MY_LUN_CMD:
+		mutex_enter(&sc->sc_cmd_mtx);
 		sc->sc_lun = *(int *)data & 0x3;
+		mutex_exit(&sc->sc_cmd_mtx);
 		break;
 	case IPMICTL_GET_MY_LUN_CMD:
+		mutex_enter(&sc->sc_cmd_mtx);
 		*(int *)data = sc->sc_lun;
+		mutex_exit(&sc->sc_cmd_mtx);
 		break;
 	case IPMICTL_SET_GETS_EVENTS_CMD:
 		break;
@@ -2519,25 +2547,12 @@ ipmi_ioctl(dev_t dev, u_long cmd, void *
 		error = EOPNOTSUPP;
 		break;
 	default:
-		error = ENODEV;	
+		error = ENODEV;
 		break;
 	}
 
 	if (buf)
 		free(buf, M_DEVBUF);
-
-	mutex_exit(&sc->sc_cmd_mtx);
-
-	switch (cmd) {
-	case IPMICTL_RECEIVE_MSG:
-	case IPMICTL_RECEIVE_MSG_TRUNC:
-		mutex_enter(&sc->sc_poll_mtx);
-		sc->sc_mode = IPMI_MODE_IDLE;
-		cv_broadcast(&sc->sc_mode_cv);
-		mutex_exit(&sc->sc_poll_mtx);
-		break;
-	}
-
 	return error;
 }
 
diff -r eab5d84e73c8 -r 702bc3da2819 sys/dev/ipmivar.h
--- a/sys/dev/ipmivar.h	Tue Dec 03 00:13:12 2024 +0000
+++ b/sys/dev/ipmivar.h	Tue Dec 03 17:08:34 2024 +0000
@@ -24,17 +24,20 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
  */
 
-#include <sys/mutex.h>
-#include <sys/condvar.h>
-
-#include <dev/sysmon/sysmonvar.h>
-
 #ifndef _IPMIVAR_H_
 #define _IPMIVAR_H_
 
+#include <sys/types.h>
+
+#include <sys/bus.h>
+#include <sys/condvar.h>
+#include <sys/ipmi.h>
+#include <sys/mutex.h>
+
+#include <dev/sysmon/sysmonvar.h>
+
 #define IPMI_IF_KCS		1
 #define IPMI_IF_SMIC		2
 #define IPMI_IF_BT		3
@@ -92,32 +95,29 @@ struct ipmi_softc {
 
 	kmutex_t		sc_poll_mtx;
 	kcondvar_t		sc_poll_cv;
-	kcondvar_t		sc_mode_cv;
 
 	kmutex_t		sc_cmd_mtx;
-	kmutex_t		sc_sleep_mtx;
-	kcondvar_t		sc_cmd_sleep;
 
 	struct ipmi_bmc_args	*sc_iowait_args;
 
 	struct ipmi_sensor	*current_sensor;
 	bool			sc_thread_running;
-	bool			sc_thread_ready;
 	bool			sc_tickle_due;
 	struct sysmon_wdog	sc_wdog;
 	struct sysmon_envsys	*sc_envsys;
 	envsys_data_t		*sc_sensor;
 	int 		sc_nsensors; /* total number of sensors */
 
-	char		sc_buf[1024 + 3]; /* IPMI_MAX_RX + 3 */
+	char		sc_buf[IPMI_MAX_RX + 3];
 	bool		sc_buf_rsvd;
 
-	/* request busy */
-	int		sc_mode;
-#define IPMI_MODE_IDLE		0
-#define IPMI_MODE_COMMAND	1
-#define IPMI_MODE_ENVSYS	2
+	/* user command response buffer */
+	kcondvar_t	sc_userbuf_cv;
+	int		sc_userbuf_len;
+	bool		sc_userbuf_busy;
 	bool		sc_sent;
+	unsigned char	sc_error;
+	char		sc_userbuf[IPMI_MAX_RX + 3];
 
 	/* dummy */
 	int		sc_address;
diff -r eab5d84e73c8 -r 702bc3da2819 sys/sys/ipmi.h
--- a/sys/sys/ipmi.h	Tue Dec 03 00:13:12 2024 +0000
+++ b/sys/sys/ipmi.h	Tue Dec 03 17:08:34 2024 +0000
@@ -32,6 +32,8 @@
 #ifndef _SYS_IPMI_H_
 #define	_SYS_IPMI_H_
 
+#include <sys/ioccom.h>
+
 #define	IPMI_MAX_ADDR_SIZE		0x20
 #define IPMI_MAX_RX			1024
 #define	IPMI_BMC_SLAVE_ADDR		0x20
@@ -99,7 +101,7 @@ struct ipmi_ipmb_addr {
 #define IPMICTL_SET_GETS_EVENTS_CMD     _IOW('i', 16, int)
 #define IPMICTL_SET_MY_ADDRESS_CMD      _IOW('i', 17, unsigned int)
 #define IPMICTL_GET_MY_ADDRESS_CMD      _IOR('i', 18, unsigned int)
-#define IPMICTL_SET_MY_LUN_CMD          _IOW('i', 19, unsigned int) 
+#define IPMICTL_SET_MY_LUN_CMD          _IOW('i', 19, unsigned int)
 #define IPMICTL_GET_MY_LUN_CMD          _IOR('i', 20, unsigned int)
 
 #endif /* !_SYS_IPMI_H_ */


Home | Main Index | Thread Index | Old Index