tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[PATCH] xhci: Reduce ring memory usage
Hello.
Most devices use a few endpoints but current xhci code allocates all of
31 endpoints in the slot when a device is connected.
This patch defers ring memory allocation to when usbd_open_pipe opens
the endpoint, and it allocates one ring for an endpoint.
For example, a ordinary network device uses 4 endpoints.
It requires 192296 bytes in old code on amd64, it requires
25096 bytes in new code.
Old:
sizeof xhci_slot 1832
ring dma buf 4096 (per endpoint) x 31
xr_cookie 2048 (per endpoint) x 31
1832 + (4096 + 2048) x 32 = 192296
New:
sizeof xhci_slot 296
sizeof xhci_ring 56 (per endpoint) x 4
ring dma buf 4096 (per endpoint) x 4
xr_cookie 2048 (per endpoint) x 4
296 + (56 + 4096 + 2048) x 4 = 25096
I've replaced xhci_endpoint in xhci_slot with xhci_ring *[], as there are
no other structures other than xhci_ring in xhci_endpoint.
--- src/sys/dev/usb/xhcivar.h.orig 2019-01-07 07:03:37.277230389 +0000
+++ src/sys/dev/usb/xhcivar.h 2020-03-14 07:08:44.887226324 +0000
@@ -32,6 +32,7 @@
#include <sys/pool.h>
#define XHCI_XFER_NTRB 20
+#define XHCI_MAX_DCI 31
struct xhci_soft_trb {
uint64_t trb_0;
@@ -70,7 +71,7 @@ struct xhci_endpoint {
struct xhci_slot {
usb_dma_t xs_dc_dma; /* device context page */
usb_dma_t xs_ic_dma; /* input context page */
- struct xhci_endpoint xs_ep[32]; /* endpoints */
+ struct xhci_ring *xs_xr[XHCI_MAX_DCI+1]; /* transfer ring */
u_int xs_idx; /* slot index */
};
@@ -114,8 +115,8 @@ struct xhci_softc {
struct xhci_slot * sc_slots;
- struct xhci_ring sc_cr; /* command ring */
- struct xhci_ring sc_er; /* event ring */
+ struct xhci_ring *sc_cr; /* command ring */
+ struct xhci_ring *sc_er; /* event ring */
usb_dma_t sc_eventst_dma;
usb_dma_t sc_dcbaa_dma;
--- src/sys/dev/usb/xhci.c.orig 2020-03-14 03:10:50.091960001 +0000
+++ src/sys/dev/usb/xhci.c 2020-03-16 04:36:02.901525857 +0000
@@ -164,7 +164,7 @@ static usbd_status xhci_do_command(struc
static usbd_status xhci_do_command_locked(struct xhci_softc * const,
struct xhci_soft_trb * const, int);
static usbd_status xhci_init_slot(struct usbd_device *, uint32_t);
-static void xhci_free_slot(struct xhci_softc *, struct xhci_slot *, int, int);
+static void xhci_free_slot(struct xhci_softc *, struct xhci_slot *);
static usbd_status xhci_set_address(struct usbd_device *, uint32_t, bool);
static usbd_status xhci_enable_slot(struct xhci_softc * const,
uint8_t * const);
@@ -175,8 +175,9 @@ static void xhci_set_dcba(struct xhci_so
static usbd_status xhci_update_ep0_mps(struct xhci_softc * const,
struct xhci_slot * const, u_int);
static usbd_status xhci_ring_init(struct xhci_softc * const,
- struct xhci_ring * const, size_t, size_t);
-static void xhci_ring_free(struct xhci_softc * const, struct xhci_ring * const);
+ struct xhci_ring **, size_t, size_t);
+static void xhci_ring_free(struct xhci_softc * const,
+ struct xhci_ring ** const);
static void xhci_setup_ctx(struct usbd_pipe *);
static void xhci_setup_route(struct usbd_pipe *, uint32_t *);
@@ -1194,20 +1195,20 @@ xhci_init(struct xhci_softc *sc)
struct xhci_erste *erst;
erst = KERNADDR(&sc->sc_eventst_dma, 0);
- erst[0].erste_0 = htole64(xhci_ring_trbp(&sc->sc_er, 0));
- erst[0].erste_2 = htole32(sc->sc_er.xr_ntrb);
+ erst[0].erste_0 = htole64(xhci_ring_trbp(sc->sc_er, 0));
+ erst[0].erste_2 = htole32(sc->sc_er->xr_ntrb);
erst[0].erste_3 = htole32(0);
usb_syncmem(&sc->sc_eventst_dma, 0,
XHCI_ERSTE_SIZE * XHCI_EVENT_RING_SEGMENTS, BUS_DMASYNC_PREWRITE);
xhci_rt_write_4(sc, XHCI_ERSTSZ(0), XHCI_EVENT_RING_SEGMENTS);
xhci_rt_write_8(sc, XHCI_ERSTBA(0), DMAADDR(&sc->sc_eventst_dma, 0));
- xhci_rt_write_8(sc, XHCI_ERDP(0), xhci_ring_trbp(&sc->sc_er, 0) |
+ xhci_rt_write_8(sc, XHCI_ERDP(0), xhci_ring_trbp(sc->sc_er, 0) |
XHCI_ERDP_LO_BUSY);
xhci_op_write_8(sc, XHCI_DCBAAP, DMAADDR(&sc->sc_dcbaa_dma, 0));
- xhci_op_write_8(sc, XHCI_CRCR, xhci_ring_trbp(&sc->sc_cr, 0) |
- sc->sc_cr.xr_cs);
+ xhci_op_write_8(sc, XHCI_CRCR, xhci_ring_trbp(sc->sc_cr, 0) |
+ sc->sc_cr->xr_cs);
xhci_op_barrier(sc, 0, 4, BUS_SPACE_BARRIER_WRITE);
@@ -1543,7 +1544,7 @@ xhci_set_dequeue_locked(struct usbd_pipe
struct xhci_softc * const sc = XHCI_PIPE2SC(pipe);
struct xhci_slot * const xs = pipe->up_dev->ud_hcpriv;
const u_int dci = xhci_ep_get_dci(pipe->up_endpoint->ue_edesc);
- struct xhci_ring * const xr = &xs->xs_ep[dci].xe_tr;
+ struct xhci_ring * const xr = xs->xs_xr[dci];
struct xhci_soft_trb trb;
usbd_status err;
@@ -1551,6 +1552,7 @@ xhci_set_dequeue_locked(struct usbd_pipe
XHCIHIST_CALLARGS("slot %ju dci %ju", xs->xs_idx, dci, 0, 0);
KASSERT(mutex_owned(&sc->sc_lock));
+ KASSERT(xr != NULL);
xhci_host_dequeue(xr);
@@ -1588,8 +1590,11 @@ xhci_open(struct usbd_pipe *pipe)
{
struct usbd_device * const dev = pipe->up_dev;
struct xhci_softc * const sc = XHCI_BUS2SC(dev->ud_bus);
+ struct xhci_slot * const xs = pipe->up_dev->ud_hcpriv;
usb_endpoint_descriptor_t * const ed = pipe->up_endpoint->ue_edesc;
+ const u_int dci = xhci_ep_get_dci(ed);
const uint8_t xfertype = UE_GET_XFERTYPE(ed->bmAttributes);
+ usbd_status err;
XHCIHIST_FUNC();
XHCIHIST_CALLARGS("addr %jd depth %jd port %jd speed %jd", dev->ud_addr,
@@ -1640,6 +1645,17 @@ xhci_open(struct usbd_pipe *pipe)
break;
}
+ KASSERT(xs != NULL);
+ KASSERT(xs->xs_xr[dci] == NULL);
+
+ /* allocate transfer ring */
+ err = xhci_ring_init(sc, &xs->xs_xr[dci], XHCI_TRANSFER_RING_TRBS,
+ XHCI_TRB_ALIGN);
+ if (err) {
+ DPRINTFN(1, "ring alloc failed %jd", err, 0, 0, 0);
+ return err;
+ }
+
if (ed->bEndpointAddress != USB_CONTROL_ENDPOINT)
return xhci_configure_endpoint(pipe);
@@ -1681,6 +1697,7 @@ xhci_close_pipe(struct usbd_pipe *pipe)
if (dci == XHCI_DCI_EP_CONTROL) {
DPRINTFN(4, "closing ep0", 0, 0, 0, 0);
+ /* This frees all rings */
xhci_disable_slot(sc, xs->xs_idx);
return;
}
@@ -1702,7 +1719,7 @@ xhci_close_pipe(struct usbd_pipe *pipe)
cp[0] = htole32(XHCI_SCTX_0_CTX_NUM_SET(dci));
/* configure ep context performs an implicit dequeue */
- xhci_host_dequeue(&xs->xs_ep[dci].xe_tr);
+ xhci_host_dequeue(xs->xs_xr[dci]);
/* sync input contexts before they are read from memory */
usb_syncmem(&xs->xs_ic_dma, 0, sc->sc_pgsz, BUS_DMASYNC_PREWRITE);
@@ -1714,6 +1731,8 @@ xhci_close_pipe(struct usbd_pipe *pipe)
(void)xhci_do_command_locked(sc, &trb, USBD_DEFAULT_TIMEOUT);
usb_syncmem(&xs->xs_dc_dma, 0, sc->sc_pgsz, BUS_DMASYNC_POSTREAD);
+
+ xhci_ring_free(sc, &xs->xs_xr[dci]);
}
/*
@@ -1809,7 +1828,7 @@ xhci_clear_endpoint_stall_async_task(voi
struct xhci_softc * const sc = XHCI_XFER2SC(xfer);
struct xhci_slot * const xs = xfer->ux_pipe->up_dev->ud_hcpriv;
const u_int dci = xhci_ep_get_dci(xfer->ux_pipe->up_endpoint->ue_edesc);
- struct xhci_ring * const tr = &xs->xs_ep[dci].xe_tr;
+ struct xhci_ring * const tr = xs->xs_xr[dci];
XHCIHIST_FUNC();
XHCIHIST_CALLARGS("xfer %#jx slot %ju dci %ju", (uintptr_t)xfer, xs->xs_idx,
@@ -1828,6 +1847,8 @@ xhci_clear_endpoint_stall_async_task(voi
return;
}
+ KASSERT(tr != NULL);
+
xhci_reset_endpoint(xfer->ux_pipe);
xhci_set_dequeue(xfer->ux_pipe);
@@ -1913,9 +1934,10 @@ xhci_event_transfer(struct xhci_softc *
slot = XHCI_TRB_3_SLOT_GET(trb_3);
dci = XHCI_TRB_3_EP_GET(trb_3);
xs = &sc->sc_slots[slot];
- xr = &xs->xs_ep[dci].xe_tr;
+ xr = xs->xs_xr[dci];
/* sanity check */
+ KASSERT(xr != NULL);
KASSERTMSG(xs->xs_idx != 0 && xs->xs_idx <= sc->sc_maxslots,
"invalid xs_idx %u slot %u", xs->xs_idx, slot);
@@ -2154,7 +2176,7 @@ xhci_softintr(void *v)
{
struct usbd_bus * const bus = v;
struct xhci_softc * const sc = XHCI_BUS2SC(bus);
- struct xhci_ring * const er = &sc->sc_er;
+ struct xhci_ring * const er = sc->sc_er;
struct xhci_trb *trb;
int i, j, k;
@@ -2341,27 +2363,22 @@ xhci_new_device(device_t parent, struct
up->up_dev = dev;
- /* Establish the default pipe. */
- err = usbd_setup_pipe(dev, 0, &dev->ud_ep0, USBD_DEFAULT_INTERVAL,
- &dev->ud_pipe0);
- if (err) {
- goto bad;
- }
-
dd = &dev->ud_ddesc;
if (depth == 0 && port == 0) {
KASSERT(bus->ub_devices[USB_ROOTHUB_INDEX] == NULL);
bus->ub_devices[USB_ROOTHUB_INDEX] = dev;
- err = usbd_get_initial_ddesc(dev, dd);
+
+ /* Establish the default pipe. */
+ err = usbd_setup_pipe(dev, 0, &dev->ud_ep0,
+ USBD_DEFAULT_INTERVAL, &dev->ud_pipe0);
if (err) {
- DPRINTFN(1, "get_initial_ddesc %ju", err, 0, 0, 0);
+ DPRINTFN(1, "setup default pipe failed %jd", err,0,0,0);
goto bad;
}
-
- err = usbd_reload_device_desc(dev);
+ err = usbd_get_initial_ddesc(dev, dd);
if (err) {
- DPRINTFN(1, "reload desc %ju", err, 0, 0, 0);
+ DPRINTFN(1, "get_initial_ddesc %ju", err, 0, 0, 0);
goto bad;
}
} else {
@@ -2393,6 +2410,18 @@ xhci_new_device(device_t parent, struct
goto bad;
}
+ /*
+ * We have to establish the default pipe _after_ slot
+ * structure has been prepared.
+ */
+ err = usbd_setup_pipe(dev, 0, &dev->ud_ep0,
+ USBD_DEFAULT_INTERVAL, &dev->ud_pipe0);
+ if (err) {
+ DPRINTFN(1, "setup default pipe failed %jd", err, 0, 0,
+ 0);
+ goto bad;
+ }
+
/* 4.3.4 Address Assignment */
err = xhci_set_address(dev, slot, false);
if (err) {
@@ -2449,12 +2478,12 @@ xhci_new_device(device_t parent, struct
DPRINTFN(1, "update mps of ep0 %ju", err, 0, 0, 0);
goto bad;
}
+ }
- err = usbd_reload_device_desc(dev);
- if (err) {
- DPRINTFN(1, "reload desc %ju", err, 0, 0, 0);
- goto bad;
- }
+ err = usbd_reload_device_desc(dev);
+ if (err) {
+ DPRINTFN(1, "reload desc %ju", err, 0, 0, 0);
+ goto bad;
}
DPRINTFN(1, "adding unit addr=%jd, rev=%02jx,",
@@ -2486,35 +2515,49 @@ xhci_new_device(device_t parent, struct
}
static usbd_status
-xhci_ring_init(struct xhci_softc * const sc, struct xhci_ring * const xr,
+xhci_ring_init(struct xhci_softc * const sc, struct xhci_ring **xrp,
size_t ntrb, size_t align)
{
usbd_status err;
size_t size = ntrb * XHCI_TRB_SIZE;
+ struct xhci_ring *xr;
XHCIHIST_FUNC();
XHCIHIST_CALLARGS("xr %#jx ntrb %#jx align %#jx",
- (uintptr_t)xr, ntrb, align, 0);
+ (uintptr_t)*xrp, ntrb, align, 0);
+
+ xr = kmem_zalloc(sizeof(struct xhci_ring), KM_SLEEP);
+ DPRINTFN(1, "ring %#jx", (uintptr_t)xr, 0, 0, 0);
err = usb_allocmem(&sc->sc_bus, size, align, &xr->xr_dma);
- if (err)
+ if (err) {
+ kmem_free(xr, sizeof(struct xhci_ring));
+ DPRINTFN(1, "alloc xr_dma failed %jd", err, 0, 0, 0);
return err;
+ }
mutex_init(&xr->xr_lock, MUTEX_DEFAULT, IPL_SOFTUSB);
xr->xr_cookies = kmem_zalloc(sizeof(*xr->xr_cookies) * ntrb, KM_SLEEP);
xr->xr_trb = xhci_ring_trbv(xr, 0);
xr->xr_ntrb = ntrb;
xr->is_halted = false;
xhci_host_dequeue(xr);
+ *xrp = xr;
return USBD_NORMAL_COMPLETION;
}
static void
-xhci_ring_free(struct xhci_softc * const sc, struct xhci_ring * const xr)
+xhci_ring_free(struct xhci_softc * const sc, struct xhci_ring ** const xr)
{
- usb_freemem(&sc->sc_bus, &xr->xr_dma);
- mutex_destroy(&xr->xr_lock);
- kmem_free(xr->xr_cookies, sizeof(*xr->xr_cookies) * xr->xr_ntrb);
+ if (*xr == NULL)
+ return;
+
+ usb_freemem(&sc->sc_bus, &(*xr)->xr_dma);
+ mutex_destroy(&(*xr)->xr_lock);
+ kmem_free((*xr)->xr_cookies,
+ sizeof(*(*xr)->xr_cookies) * (*xr)->xr_ntrb);
+ kmem_free(*xr, sizeof(struct xhci_ring));
+ *xr = NULL;
}
static void
@@ -2631,7 +2674,7 @@ xhci_ring_put(struct xhci_softc * const
static void
xhci_abort_command(struct xhci_softc *sc)
{
- struct xhci_ring * const cr = &sc->sc_cr;
+ struct xhci_ring * const cr = sc->sc_cr;
uint64_t crcr;
int i;
@@ -2675,7 +2718,7 @@ static usbd_status
xhci_do_command_locked(struct xhci_softc * const sc,
struct xhci_soft_trb * const trb, int timeout)
{
- struct xhci_ring * const cr = &sc->sc_cr;
+ struct xhci_ring * const cr = sc->sc_cr;
usbd_status err;
XHCIHIST_FUNC();
@@ -2806,7 +2849,7 @@ xhci_disable_slot(struct xhci_softc * co
if (!err) {
xs = &sc->sc_slots[slot];
if (xs->xs_idx != 0) {
- xhci_free_slot(sc, xs, XHCI_DCI_SLOT + 1, 32);
+ xhci_free_slot(sc, xs);
xhci_set_dcba(sc, 0, slot);
memset(xs, 0, sizeof(*xs));
}
@@ -2908,7 +2951,6 @@ xhci_init_slot(struct usbd_device *dev,
struct xhci_softc * const sc = XHCI_BUS2SC(dev->ud_bus);
struct xhci_slot *xs;
usbd_status err;
- u_int dci;
XHCIHIST_FUNC();
XHCIHIST_CALLARGS("slot %ju", slot, 0, 0, 0);
@@ -2918,37 +2960,26 @@ xhci_init_slot(struct usbd_device *dev,
/* allocate contexts */
err = usb_allocmem(&sc->sc_bus, sc->sc_pgsz, sc->sc_pgsz,
&xs->xs_dc_dma);
- if (err)
+ if (err) {
+ DPRINTFN(1, "failed to allocmem output device context %jd",
+ err, 0, 0, 0);
return err;
+ }
memset(KERNADDR(&xs->xs_dc_dma, 0), 0, sc->sc_pgsz);
err = usb_allocmem(&sc->sc_bus, sc->sc_pgsz, sc->sc_pgsz,
&xs->xs_ic_dma);
- if (err)
+ if (err) {
+ DPRINTFN(1, "failed to allocmem input device context %jd",
+ err, 0, 0, 0);
goto bad1;
- memset(KERNADDR(&xs->xs_ic_dma, 0), 0, sc->sc_pgsz);
-
- for (dci = 0; dci < 32; dci++) {
- //CTASSERT(sizeof(xs->xs_ep[dci]) == sizeof(struct xhci_endpoint));
- memset(&xs->xs_ep[dci], 0, sizeof(xs->xs_ep[dci]));
- if (dci == XHCI_DCI_SLOT)
- continue;
- err = xhci_ring_init(sc, &xs->xs_ep[dci].xe_tr,
- XHCI_TRANSFER_RING_TRBS, XHCI_TRB_ALIGN);
- if (err) {
- DPRINTFN(0, "ring init failure", 0, 0, 0, 0);
- goto bad2;
- }
}
+ memset(KERNADDR(&xs->xs_ic_dma, 0), 0, sc->sc_pgsz);
- bad2:
- if (err == USBD_NORMAL_COMPLETION) {
- xs->xs_idx = slot;
- } else {
- xhci_free_slot(sc, xs, XHCI_DCI_SLOT + 1, dci);
- }
+ memset(&xs->xs_xr[0], 0, sizeof(xs->xs_xr));
+ xs->xs_idx = slot;
- return err;
+ return USBD_NORMAL_COMPLETION;
bad1:
usb_freemem(&sc->sc_bus, &xs->xs_dc_dma);
@@ -2957,18 +2988,17 @@ xhci_init_slot(struct usbd_device *dev,
}
static void
-xhci_free_slot(struct xhci_softc *sc, struct xhci_slot *xs, int start_dci,
- int end_dci)
+xhci_free_slot(struct xhci_softc *sc, struct xhci_slot *xs)
{
u_int dci;
XHCIHIST_FUNC();
- XHCIHIST_CALLARGS("slot %ju start %ju end %ju",
- xs->xs_idx, start_dci, end_dci, 0);
+ XHCIHIST_CALLARGS("slot %ju", xs->xs_idx, 0, 0, 0);
- for (dci = start_dci; dci < end_dci; dci++) {
- xhci_ring_free(sc, &xs->xs_ep[dci].xe_tr);
- memset(&xs->xs_ep[dci], 0, sizeof(xs->xs_ep[dci]));
+ /* deallocate all allocated rings in the slot */
+ for (dci = XHCI_DCI_SLOT; dci <= XHCI_MAX_DCI; dci++) {
+ if (xs->xs_xr[dci] != NULL)
+ xhci_ring_free(sc, &xs->xs_xr[dci]);
}
usb_freemem(&sc->sc_bus, &xs->xs_ic_dma);
usb_freemem(&sc->sc_bus, &xs->xs_dc_dma);
@@ -3109,7 +3139,7 @@ xhci_setup_ctx(struct usbd_pipe *pipe)
/* rewind TR dequeue pointer in xHC */
/* can't use xhci_ep_get_dci() yet? */
*(uint64_t *)(&cp[2]) = htole64(
- xhci_ring_trbp(&xs->xs_ep[dci].xe_tr, 0) |
+ xhci_ring_trbp(xs->xs_xr[dci], 0) |
XHCI_EPCTX_2_DCS_SET(1));
cp[0] = htole32(cp[0]);
@@ -3117,7 +3147,7 @@ xhci_setup_ctx(struct usbd_pipe *pipe)
cp[4] = htole32(cp[4]);
/* rewind TR dequeue pointer in driver */
- struct xhci_ring *xr = &xs->xs_ep[dci].xe_tr;
+ struct xhci_ring *xr = xs->xs_xr[dci];
mutex_enter(&xr->xr_lock);
xhci_host_dequeue(xr);
mutex_exit(&xr->xr_lock);
@@ -3811,7 +3841,7 @@ xhci_device_ctrl_start(struct usbd_xfer
struct xhci_softc * const sc = XHCI_XFER2SC(xfer);
struct xhci_slot * const xs = xfer->ux_pipe->up_dev->ud_hcpriv;
const u_int dci = xhci_ep_get_dci(xfer->ux_pipe->up_endpoint->ue_edesc);
- struct xhci_ring * const tr = &xs->xs_ep[dci].xe_tr;
+ struct xhci_ring * const tr = xs->xs_xr[dci];
struct xhci_xfer * const xx = XHCI_XFER2XXFER(xfer);
usb_device_request_t * const req = &xfer->ux_request;
const int isread = usbd_xfer_isread(xfer);
@@ -3948,7 +3978,7 @@ xhci_device_bulk_start(struct usbd_xfer
struct xhci_softc * const sc = XHCI_XFER2SC(xfer);
struct xhci_slot * const xs = xfer->ux_pipe->up_dev->ud_hcpriv;
const u_int dci = xhci_ep_get_dci(xfer->ux_pipe->up_endpoint->ue_edesc);
- struct xhci_ring * const tr = &xs->xs_ep[dci].xe_tr;
+ struct xhci_ring * const tr = xs->xs_xr[dci];
struct xhci_xfer * const xx = XHCI_XFER2XXFER(xfer);
const uint32_t len = xfer->ux_length;
usb_dma_t * const dma = &xfer->ux_dmabuf;
@@ -4069,7 +4099,7 @@ xhci_device_intr_start(struct usbd_xfer
struct xhci_softc * const sc = XHCI_XFER2SC(xfer);
struct xhci_slot * const xs = xfer->ux_pipe->up_dev->ud_hcpriv;
const u_int dci = xhci_ep_get_dci(xfer->ux_pipe->up_endpoint->ue_edesc);
- struct xhci_ring * const tr = &xs->xs_ep[dci].xe_tr;
+ struct xhci_ring * const tr = xs->xs_xr[dci];
struct xhci_xfer * const xx = XHCI_XFER2XXFER(xfer);
const uint32_t len = xfer->ux_length;
const bool polling = xhci_polling_p(sc);
Home |
Main Index |
Thread Index |
Old Index