Subject: Re: new kpi proposal, sysdisk(9)
To: None <elad@NetBSD.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 01/07/2007 15:36:14
--NextPart-20070107152408-1761200
Content-Type: Text/Plain; charset=us-ascii
> > i meant, the framework should be designed so that it can track
> > which parts of disks are used by who, rather than hardcoding your policy.
>
> I won't comment on that, because that is a rather hypothetical
> statement... when it's finished, we'll get back to it. :)
how about something like the attached patch?
"query" part is not yet, but it shouldn't be too hard.
YAMAMOTO Takashi
--NextPart-20070107152408-1761200
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="dk_use.diff"
Index: sys/disk.h
===================================================================
--- sys/disk.h (revision 1922)
+++ sys/disk.h (working copy)
@@ -444,9 +444,14 @@ struct disk {
struct cpu_disklabel *dk_cpulabel;
};
+struct diskuser;
+typedef struct diskuser diskuser_t;
+
struct dkdriver {
void (*d_strategy)(struct buf *);
void (*d_minphys)(struct buf *);
+ int (*d_use)(struct disk *, int, diskuser_t *);
+ int (*d_unuse)(struct disk *, int, diskuser_t *);
#ifdef notyet
int (*d_open)(dev_t, int, int, struct proc *);
int (*d_close)(dev_t, int, int, struct proc *);
@@ -466,6 +471,39 @@ struct dkdriver {
#define DK_OPENRAW 5 /* open without label */
/*
+ * disk usage tracking
+ */
+
+int diskuser_create(const char *, diskuser_t **);
+void diskuser_destroy(diskuser_t *);
+
+int disk_use(struct vnode *, diskuser_t *);
+int disk_unuse(struct vnode *, diskuser_t *);
+
+int disk_open(struct vnode *, int, kauth_cred_t cred, struct lwp *,
+ diskuser_t *);
+int disk_close(struct vnode *, int, kauth_cred_t cred, struct lwp *,
+ diskuser_t *);
+
+/*
+ * diskrange_t: represent a range in a disk.
+ */
+
+typedef struct {
+ uint64_t r_start; /* in bytes */
+ uint64_t r_size; /* in bytes */
+} diskrange_t;
+
+/*
+ * helper functions for drivers
+ */
+
+int diskrange_use(struct disk *, const diskrange_t *, diskuser_t *);
+int diskrange_unuse(struct disk *, const diskrange_t *, diskuser_t *);
+int diskpartition_use(struct disk *, int, diskuser_t *);
+int diskpartition_unuse(struct disk *, int, diskuser_t *);
+
+/*
* Bad sector lists per fixed disk
*/
struct disk_badsectors {
Index: kern/subr_disk.c
===================================================================
--- kern/subr_disk.c (revision 1922)
+++ kern/subr_disk.c (working copy)
@@ -79,11 +79,15 @@ __KERNEL_RCSID(0, "$NetBSD: subr_disk.c,
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
+#include <sys/kmem.h>
#include <sys/buf.h>
+#include <sys/conf.h>
#include <sys/syslog.h>
#include <sys/disklabel.h>
#include <sys/disk.h>
#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
#include <lib/libkern/libkern.h>
/*
@@ -453,3 +457,190 @@ disk_ioctl(struct disk *diskp, u_long cm
return (error);
}
+
+static int
+disk_find_by_dev(dev_t dev, enum vtype type, struct disk **dkp)
+{
+ dev_t bdev;
+ const char *name;
+ struct disk *dk;
+ char devname[16];
+
+ if (type == VCHR) {
+ bdev = devsw_chr2blk(dev);
+ if (bdev == NODEV) {
+ return ENOENT;
+ }
+ } else if (type == VBLK) {
+ bdev = dev;
+ } else {
+ panic("%s: type=%d", __func__, (int)type);
+ }
+ name = devsw_blk2name(major(bdev));
+ snprintf(devname, sizeof(devname), "%s%d", name, DISKUNIT(bdev));
+ dk = disk_find(devname);
+ if (dk == NULL) {
+ return ENOENT;
+ }
+ *dkp = dk;
+ return 0;
+}
+
+struct diskuser {
+ /*
+ * XXX probably we want to put more info here.
+ * XXX use prop list?
+ */
+ const char *du_name;
+};
+
+int
+diskrange_use(struct disk *dk, const diskrange_t *r,
+ diskuser_t *du)
+{
+
+ printf("%s: %s (start=%" PRIu64 ", size=%" PRIu64 ") by %s\n",
+ __func__, dk->dk_name, r->r_start, r->r_size, du->du_name);
+
+ return 0;
+}
+
+int
+diskrange_unuse(struct disk *dk, const diskrange_t *r,
+ diskuser_t *du)
+{
+
+ printf("%s: %s (start=%" PRIu64 ", size=%" PRIu64 ") by %s\n",
+ __func__, dk->dk_name, r->r_start, r->r_size, du->du_name);
+
+ return 0;
+}
+
+static void
+diskpartition_getrange(struct disk *dk, int par, diskrange_t *range)
+{
+ const struct disklabel *lp = dk->dk_label;
+ const struct partition *p = lp->d_partitions + par;
+ const int bshift = dk->dk_blkshift + DEV_BSHIFT;
+
+ range->r_start = (uint64_t)p->p_offset << bshift;
+ range->r_size = (uint64_t)p->p_size << bshift;
+}
+
+int
+diskpartition_use(struct disk *dk, int par, diskuser_t *du)
+{
+ diskrange_t r;
+
+ diskpartition_getrange(dk, par, &r);
+ return diskrange_use(dk, &r, du);
+}
+
+int
+diskpartition_unuse(struct disk *dk, int par, diskuser_t *du)
+{
+ diskrange_t r;
+
+ diskpartition_getrange(dk, par, &r);
+ return diskrange_unuse(dk, &r, du);
+}
+
+int
+disk_use(struct vnode *vp, diskuser_t *du)
+{
+ struct disk *dk;
+ dev_t dev = vp->v_rdev;
+ int par;
+ int error;
+
+ error = disk_find_by_dev(dev, vp->v_type, &dk);
+ if (error) {
+ return error;
+ }
+ par = DISKPART(dev);
+ if (dk->dk_driver != NULL && dk->dk_driver->d_use != NULL) {
+ (*dk->dk_driver->d_use)(dk, par, du);
+ } else {
+ diskpartition_use(dk, par, du);
+ }
+ return 0;
+}
+
+int
+disk_unuse(struct vnode *vp, diskuser_t *du)
+{
+ struct disk *dk;
+ dev_t dev = vp->v_rdev;
+ int par;
+ int error;
+
+ error = disk_find_by_dev(dev, vp->v_type, &dk);
+ if (error) {
+ return error;
+ }
+ par = DISKPART(dev);
+ if (dk->dk_driver != NULL && dk->dk_driver->d_unuse != NULL) {
+ (*dk->dk_driver->d_unuse)(dk, par, du);
+ } else {
+ diskpartition_unuse(dk, par, du);
+ }
+ return 0;
+}
+
+int
+diskuser_create(const char *name, diskuser_t **dup)
+{
+ diskuser_t *du;
+
+ du = kmem_alloc(sizeof(*du), KM_SLEEP);
+ du->du_name = name;
+
+ *dup = du;
+ return 0;
+}
+
+void
+diskuser_destroy(diskuser_t *du)
+{
+
+ kmem_free(du, sizeof(*du));
+}
+
+int
+disk_open(struct vnode *vp, int mode, kauth_cred_t cred, struct lwp *l,
+ diskuser_t *du)
+{
+ int error;
+
+ error = VOP_OPEN(vp, mode, cred, l);
+ if (error) {
+ return error;
+ }
+ if (vp->v_type == VBLK || vp->v_type == VCHR) {
+ error = disk_use(vp, du);
+ if (error) {
+ VOP_CLOSE(vp, mode, cred, l);
+ return error;
+ }
+ }
+ return 0;
+}
+
+int
+disk_close(struct vnode *vp, int mode, kauth_cred_t cred, struct lwp *l,
+ diskuser_t *du)
+{
+ int error;
+ int error2;
+
+ if (vp->v_type == VBLK || vp->v_type == VCHR) {
+ error = disk_unuse(vp, du);
+ } else {
+ error = 0;
+ }
+ error2 = VOP_CLOSE(vp, mode, cred, l);
+ if (error2) {
+ return error2;
+ }
+ return error;
+}
Index: uvm/uvm_swap.c
===================================================================
--- uvm/uvm_swap.c (revision 1934)
+++ uvm/uvm_swap.c (working copy)
@@ -44,6 +44,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_swap.c,v
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/conf.h>
+#include <sys/disk.h>
#include <sys/proc.h>
#include <sys/namei.h>
#include <sys/disklabel.h>
@@ -145,6 +146,7 @@ struct swapdev {
int swd_maxactive; /* max active i/o reqs */
struct bufq_state *swd_tab; /* buffer list */
int swd_active; /* number of active buffers */
+ diskuser_t *swd_diskuser;
};
/*
@@ -775,6 +777,7 @@ swap_on(struct lwp *l, struct swapdev *s
#endif /* NFS */
const struct bdevsw *bdev;
dev_t dev;
+ diskuser_t *du;
UVMHIST_FUNC("swap_on"); UVMHIST_CALLED(pdhist);
/*
@@ -793,10 +796,25 @@ swap_on(struct lwp *l, struct swapdev *s
* we skip the open/close for root on swap because the root
* has already been opened when root was mounted (mountroot).
*/
+
+ error = diskuser_create("swap", &du);
+ if (error) {
+ return error;
+ }
if (vp != rootvp) {
- if ((error = VOP_OPEN(vp, FREAD|FWRITE, l->l_cred, l)))
- return (error);
+ error = disk_open(vp, FREAD|FWRITE, l->l_cred, l, du);
+ if (error) {
+ diskuser_destroy(du);
+ return error;
+ }
+ } else {
+ error = disk_use(vp, du);
+ if (error) {
+ diskuser_destroy(du);
+ return error;
+ }
}
+ sdp->swd_diskuser = du;
/* XXX this only works for block devices */
UVMHIST_LOG(pdhist, " dev=%d, major(dev)=%d", dev, major(dev), 0,0);
@@ -964,8 +982,11 @@ bad:
blist_destroy(sdp->swd_blist);
}
if (vp != rootvp) {
- (void)VOP_CLOSE(vp, FREAD|FWRITE, l->l_cred, l);
+ (void)disk_close(vp, FREAD|FWRITE, l->l_cred, l, du);
+ } else {
+ disk_unuse(vp, du);
}
+ diskuser_destroy(du);
return (error);
}
@@ -977,6 +998,7 @@ bad:
static int
swap_off(struct lwp *l, struct swapdev *sdp)
{
+ diskuser_t *du;
int npages = sdp->swd_npages;
int error = 0;
@@ -1019,9 +1041,13 @@ swap_off(struct lwp *l, struct swapdev *
* so that spec_close() can tell if this is the last close.
*/
vrele(sdp->swd_vp);
+ du = sdp->swd_diskuser;
if (sdp->swd_vp != rootvp) {
- (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, l->l_cred, l);
+ (void) disk_close(sdp->swd_vp, FREAD|FWRITE, l->l_cred, l, du);
+ } else {
+ disk_unuse(sdp->swd_vp, du);
}
+ diskuser_destroy(du);
simple_lock(&uvm.swap_data_lock);
uvmexp.swpages -= npages;
--NextPart-20070107152408-1761200--