tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Network drivers and memory allocation in interrupt context



Hi,

I now have a semi-working pool_cache based memory allocator for network 
drivers (tested using the iwn driver). I am uncertain, however, about how to 
get it fully functioning. The issue is memory allocation in interrupt context.

My first attempt was to use the palloc argument to pool_cache_init (see the 
mbuf_pool_poolpage_alloc method, below). This works for a while but eventually 
panics in kmem_alloc because it gets called in interrupt context (without the 
error return for interrupt context that is in the code now).

The kmem_alloc manpage suggests using pool_cache instead, so I implemented 
ctor/dtor methods to use in place of mbuf_pool_poolpage_alloc. This version 
panics, however, almost immediately in bus_dmamem_map because the pool "pvpl " 
(created in arch/x86/x86/pmap.c) is initialized with IPL_NONE. This impacts, 
of course, the mbuf_pool_poolpage_alloc version too, but happens more quickly 
here because the ctor method gets called via pool_cache_get (i.e., there is no 
preallocation of the dma buffers).

Adding the current error returns in mbuf_pool_poolpage_alloc and 
mbuf_pool_ctor avoids the panics but results in a nonworking network driver.

I'm unsure how to proceed on this. Some thoughts:

1) Initialize the "pvpl" pool with IPL_VM

2) Preallocate a large number of buffers in mbuf_pool_cache_init.

3) Rewrite the network drivers so that they do not request memory in interrupt 
context.

4) Other.

Comments?

Thanks,
Sverre

Here's the current code:

/*

Mbuf_pool_cache_init sets up a DMA safe pool_cache for the
specified bus and size. The pool_cace will use bus_dmamem_alloc
as its memory allocator. Mbuf_pool_cache_init may be called
multiple times for a given bus and size. Subsequent calls
returns the original pool_cache and increments a reference count.
Mbuf_pool_cache_init should be called from bus or device attach
methods as needed.

Mbuf_pool_cache_destroy should similarly be called from a bus or
device detach method.  The reference counter is used to destroy
the pool_cache when appropriate.

*/

#include <sys/types.h>
#include <sys/kmem.h>
#include <sys/pool.h>
#include <sys/bus.h>
#include <sys/cpu.h>

#define MBUF_POOL_POOLPAGE_ALLOC

/* The mbuf_pool_item list */
static TAILQ_HEAD(, mbuf_pool_item) mbuf_pool_head =
    TAILQ_HEAD_INITIALIZER(mbuf_pool_head);

struct mbuf_pool_item {
        TAILQ_ENTRY(mbuf_pool_item) mbuf_pool_list;
        bus_dma_tag_t mpi_bus_tag;
        unsigned int mpi_size;
        char *mpi_name;
        pool_cache_t mpi_pc;
        unsigned int mpi_refcnt;
};

struct mbuf_pool_extmem {
        bus_size_t em_size;
        bus_dma_segment_t em_seg;
        void *em_vaddr;
};

typedef struct mbuf_pool_extmem mbuf_pool_extmem_t;

static bool mbuf_pool_initialized = 0;
static kmutex_t mbuf_pool_lock;

#ifdef MBUF_POOL_POOLPAGE_ALLOC
static struct pool_allocator mbuf_pool_allocator;
#endif

#define MBUF_POOL_TO_MPI(pool) ((struct mbuf_pool_item *)(pool->pr_qcache))

struct mbuf_pool_item *
mbuf_pool_get_pool_item(pool_cache_t pc, bus_dma_tag_t tag, unsigned int 
size);

char *
mbuf_pool_get_pool_name(bus_dma_tag_t tag, unsigned int size);

pool_cache_t
mbuf_pool_cache_init(bus_dma_tag_t tag, unsigned int size);

void
mbuf_pool_cache_destroy(pool_cache_t pc);

void *
mbuf_pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap);

void
mbuf_pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa);

#ifdef MBUF_POOL_POOLPAGE_ALLOC

/*
 * Custom pool alloc and free methods.
 */

static void *
mbuf_pool_poolpage_alloc(struct pool *pool, int prflags)
{
        void *poolpage;
        unsigned int poolpage_size = pool->pr_alloc->pa_pagesz;
        struct mbuf_pool_item *mpi;
        mbuf_pool_extmem_t *em;
        unsigned int size, em_count, i, nsegs;
        int error;

        /* kmem_alloc cannot be used in interrupt context */
        if (cpu_intr_p() || cpu_softintr_p())
                return NULL;

        /* Verify assumptions that are made in the code, below */
        if (poolpage_size < sizeof(mbuf_pool_extmem_t) ||
            poolpage_size % sizeof(mbuf_pool_extmem_t) != 0)
                panic("mbuf_pool_poolpage_alloc: invalid struct 
mbuf_pool_extmem 
size");

        /* XXX Should this be KM_NOSLEEP? */
        /* cannot use in interrupt context.
           says use pool_cache instead. how? */
        poolpage = kmem_alloc(poolpage_size, KM_SLEEP);
        if (poolpage == NULL)
                goto fail1;

        mpi = MBUF_POOL_TO_MPI(pool);

        em_count = poolpage_size / sizeof(mbuf_pool_extmem_t);
        size = mpi->mpi_size;
        em = poolpage;

        for (i = 0; i < em_count; i++) {
                em->em_size = size;

                /* XXX verify alignment arg (size) */
                error = bus_dmamem_alloc(mpi->mpi_bus_tag, size,
                    size, 0, &em->em_seg, 1, &nsegs, BUS_DMA_NOWAIT);
                if (error != 0 || nsegs != 1)
                        goto fail2;

                error = bus_dmamem_map(mpi->mpi_bus_tag, &em->em_seg, 1,
                    size, &em->em_vaddr, BUS_DMA_WAITOK);
                if (error != 0 || em->em_vaddr == NULL)
                        goto fail3;

                em++;
        }

        return poolpage;
fail3:
        bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1);
fail2:
        while (--em >= (mbuf_pool_extmem_t *) poolpage)
                bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1);
        kmem_free(poolpage, poolpage_size);
fail1:
printf("iwn: mbuf_pool_poolpage_alloc failure\n");
        return NULL;
}

static void
mbuf_pool_poolpage_free(struct pool *pool, void *poolpage)
{
        unsigned int poolpage_size = pool->pr_alloc->pa_pagesz;
        struct mbuf_pool_item *mpi;
        mbuf_pool_extmem_t *em;
        unsigned int em_count, i;

        mpi = MBUF_POOL_TO_MPI(pool);

        em_count = poolpage_size / sizeof(mbuf_pool_extmem_t);
        em = poolpage;

        for (i = 0; i < em_count; i++) {
                bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1);
                em++;
        }

        kmem_free(poolpage, poolpage_size);
}

#else

static int
mbuf_pool_ctor(void *arg, void *object, int flags)
{
        struct mbuf_pool_item *mpi = arg;
        mbuf_pool_extmem_t *em = object;
        unsigned int size, nsegs;
        int error;

        /* bus_dmamem_map fails in interrupt context */
        if (cpu_intr_p() || cpu_softintr_p())
                return EBUSY;

        size = mpi->mpi_size;
        em->em_size = size;

        /* XXX verify alignment arg (size) */
        error = bus_dmamem_alloc(mpi->mpi_bus_tag, size,
            size, 0, &em->em_seg, 1, &nsegs, BUS_DMA_NOWAIT);
        if (error != 0 || nsegs != 1)
                goto fail1;

        /* XXX next call fails with ipl error in pool_get (pool pvpl) */
        error = bus_dmamem_map(mpi->mpi_bus_tag, &em->em_seg, 1,
            size, &em->em_vaddr, BUS_DMA_WAITOK);
        if (error != 0 || em->em_vaddr == NULL)
                goto fail2;

        return 0;
fail2:
        bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1);
fail1:
printf("iwn: mbuf_pool_ctor failure\n");
        /* XXX need to return an error here */
        return (error != 0) ? error : ENOMEM;
}

static void
mbuf_pool_dtor(void *arg, void *object)
{
        struct mbuf_pool_item *mpi = arg;
        mbuf_pool_extmem_t *em = object;

        bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1);
}

#endif

/*
 * Return the mbuf_pool_item struct that matches pc or tag and size.
 * Must be called with mutex held.
 */

struct mbuf_pool_item *
mbuf_pool_get_pool_item(pool_cache_t pc, bus_dma_tag_t tag, unsigned int size)
{
        struct mbuf_pool_item *mpi = NULL, *mpi1;

        TAILQ_FOREACH(mpi1, &mbuf_pool_head, mbuf_pool_list) {
                if (mpi1->mpi_pc == pc ||
                    (mpi1->mpi_size == size && mpi1->mpi_bus_tag == tag)) {
                        mpi = mpi1;
                        break;
                }
        }

        return mpi;
}

char *
mbuf_pool_get_pool_name(bus_dma_tag_t tag, unsigned int size)
{
        char *name;
        name = kmem_alloc(16, KM_SLEEP);
        snprintf(name, 22, "iwn: test_cache_%d", size);
        return name;
}

pool_cache_t
mbuf_pool_cache_init(bus_dma_tag_t tag, unsigned int size)
{
        pool_cache_t pc = NULL;
        char *name;
        struct mbuf_pool_item *mpi;
        int pc_size = sizeof(struct mbuf_pool_extmem);
        int nmbclusters;

        if (! mbuf_pool_initialized) {
                /* XXX Racy code. Need a proper constructor? */
                /* XXX IPL_NONE implies: cannot use in
                   an interrupt handler. Verify! */
                mutex_init(&mbuf_pool_lock, MUTEX_DEFAULT, IPL_NONE);
                mbuf_pool_initialized = true;
        }

        mutex_enter(&mbuf_pool_lock);

        /* Protect by mutex in order to avoid race
           with mbuf_pool_cache_destroy */

        /* Existing mbuf_pool_cache? */
        mpi = mbuf_pool_get_pool_item(NULL, tag, size);

        if (mpi == NULL) {

                /* Create a new pool cache */

                mpi = kmem_alloc(sizeof(struct mbuf_pool_item), KM_SLEEP);
                if (mpi == NULL)
                        goto fail;

                mpi->mpi_bus_tag = tag;
                mpi->mpi_size = size;

                /* Pool caches must be named - make up a name. */
                name = mbuf_pool_get_pool_name(tag, size);
                mpi->mpi_name = name;

                /* Should we use IPL_NET instead of IPL_VM? */
#ifdef MBUF_POOL_POOLPAGE_ALLOC
                mbuf_pool_allocator.pa_alloc = &mbuf_pool_poolpage_alloc;
                mbuf_pool_allocator.pa_free = &mbuf_pool_poolpage_free;

                pc = pool_cache_init(pc_size, 0, 0, PR_NOALIGN|PR_NOTOUCH,
                    name, &mbuf_pool_allocator, IPL_VM, NULL, NULL, NULL);
#else
                pc = pool_cache_init(pc_size, 0, 0, PR_NOALIGN|PR_NOTOUCH,
                    name, NULL, IPL_VM, &mbuf_pool_ctor, &mbuf_pool_dtor, mpi);
#endif
printf("mbuf_pool_cache_init (%px): %d / %s (%d)\n", pc, size, name, pc_size);

                if (pc == NULL) {
                        kmem_free(mpi, sizeof(struct mbuf_pool_item));
                        goto fail;
                }

                /*
                 * Set the hard limit on the pool to the number of
                 * mbuf clusters the kernel is to support.  Log the limit
                 * reached message max once a minute.
                 * XXX Sizing is wrong. Fix.
                 */
                nmbclusters = physmem * PAGE_SIZE / (4 * size);
#ifdef NMBCLUSTERS_MAX
                nmbclusters = MIN(nmbclusters, NMBCLUSTERS_MAX);
#endif
#ifdef NMBCLUSTERS
                nmbclusters = MIN(nmbclusters, NMBCLUSTERS);
#endif
                pool_cache_sethardlimit(pc, nmbclusters,
                   "WARNING: mbuf_pool_cache limit reached", 60);

                /* mpi is needed in mbuf_pool_poolpage_alloc/free */
                /* XXX is this OK? */
                pc->pc_pool.pr_qcache = mpi;

                mpi->mpi_pc = pc;
                mpi->mpi_refcnt = 1;

                /* Add the mbuf_pool_item to the mbuf pool item list. */
                TAILQ_INSERT_TAIL(&mbuf_pool_head, mpi, mbuf_pool_list);
        }
        else {
                /* Increment refcount and return the existing pool cache */
                mpi->mpi_refcnt++;
                pc = mpi->mpi_pc;
        }

fail:   mutex_exit(&mbuf_pool_lock);

        if (pc != NULL) {
                /* pool_cache_set_drain_hook(pc, m_reclaim, NULL); */
                /* pool_cache_sethardlimit(pc, nmbclusters, mclpool_4k_warnmsg, 
60); 
*/
        }

        return pc;
}

void
mbuf_pool_cache_destroy(pool_cache_t pc)
{
        struct mbuf_pool_item *mpi;
        
        mutex_enter(&mbuf_pool_lock);

        mpi = mbuf_pool_get_pool_item(pc, NULL, 0);
        KASSERT(mpi != NULL);

        if (mpi->mpi_refcnt <= 1) {
                /* Pool cache is no longer needed */

                pool_cache_destroy(mpi->mpi_pc);
                TAILQ_REMOVE(&mbuf_pool_head, mpi, mbuf_pool_list);
                kmem_free(mpi, sizeof(struct mbuf_pool_item));
        }
        else {
                mpi->mpi_refcnt--;
        }

        if (TAILQ_EMPTY(&mbuf_pool_head)) {
                /* XXX Add code here that un-initializes
                   this object when appropriate. */
                /* XXX OK to distroy a held mutex? */
                /* XXX Racy code. */
                mutex_destroy(&mbuf_pool_lock);
                mbuf_pool_initialized = false;
        }

        if (mbuf_pool_initialized)
                mutex_exit(&mbuf_pool_lock);
}

/* XXX These methods may not be needed. Why not call
   the pool_cache methods instead? */
/* Perhaps implement OpenBSD's livelock solution? */

void *
mbuf_pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap)
{
        return pool_cache_get_paddr(pc, flags, pap);
}

void
mbuf_pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa)
{
        return pool_cache_put_paddr(pc, object, pa);
}

/*

Implement these as needed:

mbuf_pool_cache_get
mbuf_pool_cache_put
mbuf_pool_cache_destruct_object
mbuf_pool_cache_invalidate
mbuf_pool_cache_sethiwat
mbuf_pool_cache_setlowat
mbuf_pool_cache_sethardlimit

*/


Home | Main Index | Thread Index | Old Index