Source-Changes-D archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: CVS commit: src/sys/dev/dm
I have changed cvs commit message for
> dm.h dm_target_stripe.c
to
Add multi device strip support written by Guillermo Amaral and reviewed by me.
On Oct,Saturday 23 2010, at 11:18 PM, Adam Hamsik wrote:
> Module Name: src
> Committed By: haad
> Date: Sat Oct 23 21:18:55 UTC 2010
>
> Modified Files:
> src/sys/dev/dm: device-mapper.c dm.h dm_target_stripe.c
> Added Files:
> src/sys/dev/dm/doc: locking.txt
>
> Log Message:
> Add old file describing locking schema used in dm driver.
>
>
> To generate a diff of this commit:
> cvs rdiff -u -r1.24 -r1.25 src/sys/dev/dm/device-mapper.c
> cvs rdiff -u -r1.18 -r1.19 src/sys/dev/dm/dm.h
> cvs rdiff -u -r1.10 -r1.11 src/sys/dev/dm/dm_target_stripe.c
> cvs rdiff -u -r0 -r1.1 src/sys/dev/dm/doc/locking.txt
>
> Please note that diffs are not public domain; they are subject to the
> copyright notices on the relevant files.
>
> Modified files:
>
> Index: src/sys/dev/dm/device-mapper.c
> diff -u src/sys/dev/dm/device-mapper.c:1.24
> src/sys/dev/dm/device-mapper.c:1.25
> --- src/sys/dev/dm/device-mapper.c:1.24 Sat Oct 9 12:56:06 2010
> +++ src/sys/dev/dm/device-mapper.c Sat Oct 23 21:18:54 2010
> @@ -1,4 +1,4 @@
> -/* $NetBSD: device-mapper.c,v 1.24 2010/10/09 12:56:06 haad Exp $ */
> +/* $NetBSD: device-mapper.c,v 1.25 2010/10/23 21:18:54 haad Exp $ */
>
> /*
> * Copyright (c) 2010 The NetBSD Foundation, Inc.
> @@ -350,7 +350,6 @@
> r = 0;
>
> aprint_debug("dmioctl called\n");
> -
> KASSERT(data != NULL);
>
> if (( r = disk_ioctl_switch(dev, cmd, data)) == ENOTTY) {
>
> Index: src/sys/dev/dm/dm.h
> diff -u src/sys/dev/dm/dm.h:1.18 src/sys/dev/dm/dm.h:1.19
> --- src/sys/dev/dm/dm.h:1.18 Tue May 18 15:10:41 2010
> +++ src/sys/dev/dm/dm.h Sat Oct 23 21:18:54 2010
> @@ -1,4 +1,4 @@
> -/* $NetBSD: dm.h,v 1.18 2010/05/18 15:10:41 haad Exp $ */
> +/* $NetBSD: dm.h,v 1.19 2010/10/23 21:18:54 haad Exp $ */
>
> /*
> * Copyright (c) 2008 The NetBSD Foundation, Inc.
> @@ -170,12 +170,23 @@
> typedef struct target_linear_config {
> dm_pdev_t *pdev;
> uint64_t offset;
> + TAILQ_ENTRY(target_linear_config) entries;
> } dm_target_linear_config_t;
>
> +/*
> + * Striping devices are stored in a linked list, this might be inefficient
> + * for more than 8 striping devices and can be changed to something more
> + * scalable.
> + * TODO: look for other options than linked list.
> + */
> +TAILQ_HEAD(target_linear_devs, target_linear_config);
> +
> +typedef struct target_linear_devs dm_target_linear_devs_t;
> +
> /* for stripe : */
> typedef struct target_stripe_config {
> -#define MAX_STRIPES 2
> - struct target_linear_config stripe_devs[MAX_STRIPES];
> +#define DM_STRIPE_DEV_OFFSET 2
> + struct target_linear_devs stripe_devs;
> uint8_t stripe_num;
> uint64_t stripe_chunksize;
> size_t params_len;
>
> Index: src/sys/dev/dm/dm_target_stripe.c
> diff -u src/sys/dev/dm/dm_target_stripe.c:1.10
> src/sys/dev/dm/dm_target_stripe.c:1.11
> --- src/sys/dev/dm/dm_target_stripe.c:1.10 Tue May 18 15:10:41 2010
> +++ src/sys/dev/dm/dm_target_stripe.c Sat Oct 23 21:18:54 2010
> @@ -1,4 +1,4 @@
> -/*$NetBSD: dm_target_stripe.c,v 1.10 2010/05/18 15:10:41 haad Exp $*/
> +/*$NetBSD: dm_target_stripe.c,v 1.11 2010/10/23 21:18:54 haad Exp $*/
>
> /*
> * Copyright (c) 2009 The NetBSD Foundation, Inc.
> @@ -102,6 +102,8 @@
>
> /*
> * Init function called from dm_table_load_ioctl.
> + * DM_STRIPE_DEV_OFFSET should always hold the index of the first
> device-offset
> + * pair in the parameters.
> * Example line sent to dm from lvm tools when using striped target.
> * start length striped #stripes chunk_size device1 offset1 ... deviceN
> offsetN
> * 0 65536 striped 2 512 /dev/hda 0 /dev/hdb 0
> @@ -109,9 +111,11 @@
> int
> dm_target_stripe_init(dm_dev_t * dmv, void **target_config, char *params)
> {
> + dm_target_linear_config_t *tlc;
> dm_target_stripe_config_t *tsc;
> size_t len;
> char **ap, *argv[10];
> + int strpc, strpi;
>
> if (params == NULL)
> return EINVAL;
> @@ -130,33 +134,34 @@
>
> printf("Stripe target init function called!!\n");
>
> - printf("Stripe target chunk size %s number of stripes %s\n", argv[1],
> argv[0]);
> - printf("Stripe target device name %s -- offset %s\n", argv[2], argv[3]);
> - printf("Stripe target device name %s -- offset %s\n", argv[4], argv[5]);
> + printf("Stripe target chunk size %s number of stripes %s\n",
> + argv[1], argv[0]);
>
> - if (atoi(argv[0]) > MAX_STRIPES)
> - return ENOTSUP;
> -
> - if ((tsc = kmem_alloc(sizeof(dm_target_stripe_config_t), KM_NOSLEEP))
> - == NULL)
> + if ((tsc = kmem_alloc(sizeof(*tsc), KM_NOSLEEP)) == NULL)
> return ENOMEM;
>
> - /* Insert dmp to global pdev list */
> - if ((tsc->stripe_devs[0].pdev = dm_pdev_insert(argv[2])) == NULL)
> - return ENOENT;
> -
> - /* Insert dmp to global pdev list */
> - if ((tsc->stripe_devs[1].pdev = dm_pdev_insert(argv[4])) == NULL)
> - return ENOENT;
> -
> - tsc->stripe_devs[0].offset = atoi(argv[3]);
> - tsc->stripe_devs[1].offset = atoi(argv[5]);
> + /* Initialize linked list for striping devices */
> + TAILQ_INIT(&tsc->stripe_devs);
>
> /* Save length of param string */
> tsc->params_len = len;
> tsc->stripe_chunksize = atoi(argv[1]);
> tsc->stripe_num = (uint8_t) atoi(argv[0]);
>
> + strpc = DM_STRIPE_DEV_OFFSET + (tsc->stripe_num * 2);
> + for (strpi = DM_STRIPE_DEV_OFFSET; strpi < strpc; strpi += 2) {
> + printf("Stripe target device name %s -- offset %s\n",
> + argv[strpi], argv[strpi+1]);
> +
> + tlc = kmem_alloc(sizeof(*tlc), KM_NOSLEEP);
> + if ((tlc->pdev = dm_pdev_insert(argv[strpi])) == NULL)
> + return ENOENT;
> + tlc->offset = atoi(argv[strpi+1]);
> +
> + /* Insert striping device to linked list. */
> + TAILQ_INSERT_TAIL(&tsc->stripe_devs, tlc, entries);
> + }
> +
> *target_config = tsc;
>
> dmv->dev_type = DM_STRIPE_DEV;
> @@ -167,18 +172,28 @@
> char *
> dm_target_stripe_status(void *target_config)
> {
> + dm_target_linear_config_t *tlc;
> dm_target_stripe_config_t *tsc;
> - char *params;
> + char *params, *tmp;
>
> tsc = target_config;
>
> if ((params = kmem_alloc(DM_MAX_PARAMS_SIZE, KM_SLEEP)) == NULL)
> return NULL;
>
> - snprintf(params, DM_MAX_PARAMS_SIZE, "%d %" PRIu64 " %s %" PRIu64 " %s
> %" PRIu64,
> - tsc->stripe_num, tsc->stripe_chunksize,
> - tsc->stripe_devs[0].pdev->name, tsc->stripe_devs[0].offset,
> - tsc->stripe_devs[1].pdev->name, tsc->stripe_devs[1].offset);
> + if ((tmp = kmem_alloc(DM_MAX_PARAMS_SIZE, KM_SLEEP)) == NULL)
> + return NULL;
> +
> + snprintf(params, DM_MAX_PARAMS_SIZE, "%d %" PRIu64,
> + tsc->stripe_num, tsc->stripe_chunksize);
> +
> + TAILQ_FOREACH(tlc, &tsc->stripe_devs, entries) {
> + snprintf(tmp, DM_MAX_PARAMS_SIZE, " %s %" PRIu64,
> + tlc->pdev->name, tlc->offset);
> + strcat(params, tmp);
> + }
> +
> + kmem_free(tmp, DM_MAX_PARAMS_SIZE);
>
> return params;
> }
> @@ -186,12 +201,13 @@
> int
> dm_target_stripe_strategy(dm_table_entry_t * table_en, struct buf * bp)
> {
> + dm_target_linear_config_t *tlc;
> dm_target_stripe_config_t *tsc;
> struct buf *nestbuf;
> uint64_t blkno, blkoff;
> uint64_t stripe, stripe_blknr;
> uint32_t stripe_off, stripe_rest, num_blks, issue_blks;
> - int stripe_devnr;
> + int i, stripe_devnr;
>
> tsc = table_en->target_config;
> if (tsc == NULL)
> @@ -224,9 +240,17 @@
>
> nestiobuf_setup(bp, nestbuf, blkoff, issue_blks * DEV_BSIZE);
> nestbuf->b_blkno = stripe_blknr * tsc->stripe_chunksize +
> stripe_off;
> - nestbuf->b_blkno += tsc->stripe_devs[stripe_devnr].offset;
>
> - VOP_STRATEGY(tsc->stripe_devs[stripe_devnr].pdev->pdev_vnode,
> nestbuf);
> + tlc = TAILQ_FIRST(&tsc->stripe_devs);
> + for (i = 0; i < stripe_devnr && tlc == NULL; i++)
> + tlc = TAILQ_NEXT(tlc, entries);
> +
> + /* by this point we should have an tlc */
> + KASSERT(tlc == NULL);
> +
> + nestbuf->b_blkno += tlc->offset;
> +
> + VOP_STRATEGY(tlc->pdev->pdev_vnode, nestbuf);
>
> blkno += issue_blks;
> blkoff += issue_blks * DEV_BSIZE;
> @@ -242,16 +266,17 @@
> int
> dm_target_stripe_sync(dm_table_entry_t * table_en)
> {
> - int cmd, err, i;
> + int cmd, err;
> dm_target_stripe_config_t *tsc;
> + dm_target_linear_config_t *tlc;
>
> tsc = table_en->target_config;
>
> err = 0;
> cmd = 1;
>
> - for (i = 0; i < tsc->stripe_num; i++) {
> - if ((err = VOP_IOCTL(tsc->stripe_devs[i].pdev->pdev_vnode,
> DIOCCACHESYNC,
> + TAILQ_FOREACH(tlc, &tsc->stripe_devs, entries) {
> + if ((err = VOP_IOCTL(tlc->pdev->pdev_vnode, DIOCCACHESYNC,
> &cmd, FREAD|FWRITE, kauth_cred_get())) != 0)
> return err;
> }
> @@ -264,19 +289,23 @@
> dm_target_stripe_destroy(dm_table_entry_t * table_en)
> {
> dm_target_stripe_config_t *tsc;
> + dm_target_linear_config_t *tlc;
>
> tsc = table_en->target_config;
>
> if (tsc == NULL)
> return 0;
>
> - dm_pdev_decr(tsc->stripe_devs[0].pdev);
> - dm_pdev_decr(tsc->stripe_devs[1].pdev);
> + while ((tlc = TAILQ_FIRST(&tsc->stripe_devs)) != NULL) {
> + TAILQ_REMOVE(&tsc->stripe_devs, tlc, entries);
> + dm_pdev_decr(tlc->pdev);
> + kmem_free(tlc, sizeof(*tlc));
> + }
>
> /* Unbusy target so we can unload it */
> dm_target_unbusy(table_en->target);
>
> - kmem_free(tsc, sizeof(dm_target_stripe_config_t));
> + kmem_free(tsc, sizeof(*tsc));
>
> table_en->target_config = NULL;
>
> @@ -287,6 +316,7 @@
> dm_target_stripe_deps(dm_table_entry_t * table_en, prop_array_t prop_array)
> {
> dm_target_stripe_config_t *tsc;
> + dm_target_linear_config_t *tlc;
> struct vattr va;
>
> int error;
> @@ -296,15 +326,12 @@
>
> tsc = table_en->target_config;
>
> - if ((error = VOP_GETATTR(tsc->stripe_devs[0].pdev->pdev_vnode, &va,
> curlwp->l_cred)) != 0)
> - return error;
> -
> - prop_array_add_uint64(prop_array, (uint64_t) va.va_rdev);
> + TAILQ_FOREACH(tlc, &tsc->stripe_devs, entries) {
> + if ((error = VOP_GETATTR(tlc->pdev->pdev_vnode, &va,
> curlwp->l_cred)) != 0)
> + return error;
>
> - if ((error = VOP_GETATTR(tsc->stripe_devs[1].pdev->pdev_vnode, &va,
> curlwp->l_cred)) != 0)
> - return error;
> -
> - prop_array_add_uint64(prop_array, (uint64_t) va.va_rdev);
> + prop_array_add_uint64(prop_array, (uint64_t) va.va_rdev);
> + }
>
> return 0;
> }
>
> Added files:
>
> Index: src/sys/dev/dm/doc/locking.txt
> diff -u /dev/null src/sys/dev/dm/doc/locking.txt:1.1
> --- /dev/null Sat Oct 23 21:18:55 2010
> +++ src/sys/dev/dm/doc/locking.txt Sat Oct 23 21:18:55 2010
> @@ -0,0 +1,263 @@
> +
> + Device-mapper Locking architecture
> +
> +Overview
> +
> +There are 2 users in device-mapper driver
> + a) Users who uses disk drives
> + b) Users who uses ioctl management interface
> +
> +Management is done by dm_dev_*_ioctl and dm_table_*_ioctl routines. There
> are
> +two major structures used in these routines/device-mapper.
> +
> +Table entry:
> +
> +typedef struct dm_table_entry {
> + struct dm_dev *dm_dev; /* backlink */
> + uint64_t start;
> + uint64_t length;
> +
> + struct dm_target *target; /* Link to table target. */
> + void *target_config; /* Target specific data. */
> + SLIST_ENTRY(dm_table_entry) next;
> +} dm_table_entry_t;
> +
> +This structure stores every target part of dm device. Every device can have
> +more than one target mapping entries stored in a list. This structure
> describe
> +mapping between logical/physical blocks in dm device.
> +
> +start length target block device offset
> +0 102400 linear /dev/wd1a 384
> +102400 204800 linear /dev/wd2a 384
> +204800 409600 linear /dev/wd3a 384
> +
> +Every device has at least two tables ACTIVE and INACTIVE. Only ACTIVE table
> is
> +used during IO. Every IO operation on dm device have to walk through
> dm_table_entries list.
> +
> +Device entry:
> +
> +typedef struct dm_dev {
> + char name[DM_NAME_LEN];
> + char uuid[DM_UUID_LEN];
> +
> + int minor;
> + uint32_t flags; /* store communication protocol flags */
> +
> + kmutex_t dev_mtx; /* mutex for generall device lock */
> + kcondvar_t dev_cv; /* cv for ioctl synchronisation */
> +
> + uint32_t event_nr;
> + uint32_t ref_cnt;
> +
> + uint32_t dev_type;
> +
> + dm_table_head_t table_head;
> +
> + struct dm_dev_head upcalls;
> +
> + struct disklabel *dk_label; /* Disklabel for this table. */
> +
> + TAILQ_ENTRY(dm_dev) next_upcall; /* LIST of mirrored, snapshoted
> devices. */
> +
> + TAILQ_ENTRY(dm_dev) next_devlist; /* Major device list. */
> +} dm_dev_t;
> +
> +Every device created in dm device-mapper is represented with this structure.
> +All devices are stored in a list. Every ioctl routine have to work with this
> +structure.
> +
> + Locking in dm driver
> +
> +Locking must be done in two ways. Synchronisation between ioctl routines and
> +between IO operations and ioctl. Table entries are read during IO and during
> some ioctl routines. There are only few routines which manipulates table
> lists.
> +
> +Read access to table list:
> +
> +dmsize
> +dmstrategy
> +dm_dev_status_ioctl
> +dm_table_info_ioctl
> +dm_table_deps_ioctl
> +dm_disk_ioctl -> DIOCCACHESYNC ioctl
> +
> +Write access to table list:
> +dm_dev_remove_ioctl -> remove device from list, this routine have to
>
> + remove all tables.
> +dm_dev_resume_ioctl -> Switch tables on suspended device, switch
> INACTIVE
> + and ACTIVE tables.
> +dm_table_clear_ioctl -> Remove INACTIVE table from table list.
> +
> +
> +Synchronisation between readers and writers in table list
> +
> +I moved everything needed for table synchronisation to struct dm_table_head.
> +
> +typedef struct dm_table_head {
> + /* Current active table is selected with this. */
> + int cur_active_table;
> + struct dm_table tables[2];
> +
> + kmutex_t table_mtx;
> + kcondvar_t table_cv; /*IO waiting cv */
> +
> + uint32_t io_cnt;
> +} dm_table_head_t;
> +
> +dm_table_head_t is used as entry for every dm_table synchronisation routine.
> +
> +Because every table user have to get list to table list head I have
> implemented
> +these routines to manage access to table lists.
> +
> +/*
>
> + * Destroy all table data. This function can run when there are no
>
> + * readers on table lists.
>
> + */
> +int dm_table_destroy(dm_table_head_t *, uint8_t);
> +
> +/*
>
> + * Return length of active table in device.
>
> + */
> +uint64_t dm_table_size(dm_table_head_t *);
> +
> +/*
>
> + * Return current active table to caller, increment io_cnt reference
> counter.
> + */
> +struct dm_table * dm_table_get_entry(dm_table_head_t *, uint8_t);
> +
> +/*
>
> + * Return > 0 if table is at least one table entry (returns number of
> entries)
> + * and return 0 if there is not. Target count returned from this function
>
> + * doesn't need to be true when userspace user receive it (after return
>
> + * there can be dm_dev_resume_ioctl), therfore this isonly informative.
>
> + */
> +int dm_table_get_target_count(dm_table_head_t *, uint8_t);
> +
> +/*
>
> + * Decrement io reference counter and wake up all callers, with table_head
> cv.
> + */
> +void dm_table_release(dm_table_head_t *, uint8_t s);
> +
> +/*
>
> + * Switch table from inactive to active mode. Have to wait until io_cnt is
> 0.
> + */
> +void dm_table_switch_tables(dm_table_head_t *);
> +
> +/*
>
> + * Initialize table_head structures, I'm trying to keep this structure as
>
> + * opaque as possible.
>
> + */
> +void dm_table_head_init(dm_table_head_t *);
> +
> +/*
>
> + * Destroy all variables in table_head
>
> + */
> +void dm_table_head_destroy(dm_table_head_t *);
> +
> +Internal table synchronisation protocol
> +
> +Readers:
> +dm_table_size
> +dm_table_get_target_count
> +dm_table_get_target_count
> +
> +Readers with hold reference counter:
> +dm_table_get_entry
> +dm_table_release
> +
> +Writer:
> +dm_table_destroy
> +dm_table_switch_tables
> +
> +For managing synchronisation to table lists I use these routines. Every
> reader
> +uses dm_table_busy routine to hold reference counter during work and
> dm_table_unbusy for reference counter release. Every writer have to wait
> while
> +is reference counter 0 and only then it can work with device. It will sleep
> on
> +head->table_cv while there are other readers. dm_table_get_entry is specific
> in that it will return table with hold reference counter. After
> dm_table_get_entry
> +every caller must call dm_table_release when it doesn't want to work with
> it.
> +
> +/*
>
> + * Function to increment table user reference counter. Return id
>
> + * of table_id table.
>
> + * DM_TABLE_ACTIVE will return active table id.
>
> + * DM_TABLE_INACTIVE will return inactive table id.
>
> + */
> +static int
> +dm_table_busy(dm_table_head_t *head, uint8_t table_id)
> +{
> + uint8_t id;
> +
> + id = 0;
> +
> + mutex_enter(&head->table_mtx);
> +
> + if (table_id == DM_TABLE_ACTIVE)
> + id = head->cur_active_table;
> + else
> + id = 1 - head->cur_active_table;
> +
> + head->io_cnt++;
> +
> + mutex_exit(&head->table_mtx);
> + return id;
> +}
> +
> +/*
>
> + * Function release table lock and eventually wakeup all waiters.
>
> + */
> +static void
> +dm_table_unbusy(dm_table_head_t *head)
> +{
> + KASSERT(head->io_cnt != 0);
> +
> + mutex_enter(&head->table_mtx);
> +
> + if (--head->io_cnt == 0)
> + cv_broadcast(&head->table_cv);
> +
> + mutex_exit(&head->table_mtx);
> +}
> +
> +Device-mapper betwwen ioctl device synchronisation
> +
> +
> +Every ioctl user have to find dm_device with name, uuid, minor number.
> +For this dm_dev_lookup is used. This routine returns device with hold
> reference
> +counter.
> +
> +void
> +dm_dev_busy(dm_dev_t *dmv)
> +{
> + mutex_enter(&dmv->dev_mtx);
> + dmv->ref_cnt++;
> + mutex_exit(&dmv->dev_mtx);
> +}
> +
> +void
> +dm_dev_unbusy(dm_dev_t *dmv)
> +{
> + KASSERT(dmv->ref_cnt != 0);
> +
> + mutex_enter(&dmv->dev_mtx);
> + if (--dmv->ref_cnt == 0)
> + cv_broadcast(&dmv->dev_cv);
> + mutex_exit(&dmv->dev_mtx);
> +}
> +
> +Before returning from ioctl routine must release reference counter with
> +dm_dev_unbusy.
> +
> +dm_dev_remove_ioctl routine have to remove dm_dev from global device list,
> +and wait until all ioctl users from dm_dev are gone.
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
> +
>
Regards
Adam.
Home |
Main Index |
Thread Index |
Old Index