Subject: support for mmap'ing disk block devices
To: None <tech-kern@NetBSD.ORG>
From: Jason Thorpe <thorpej@nas.nasa.gov>
List: tech-kern
Date: 06/28/1998 21:53:11
Hi folks...
So, I bit the bullet, and added support for mmap'ing disk block devices
today. This is a cool feature that e.g. Solaris has, that database apps
like to use, and, apparently, INN, too.
I added it to both Mach VM and UVM, since the changes were so trivial.
I've tested it under UVM with a SCSI disk. For some reason, it does
NOT work with vnds; I don't know why yet, but I managed to jump through
a NULL function pointer (it looked like, anyhow) when I tried it. Deep
Evil somewhere else, I suspect.
Anyhow, I'd like to commit these RSN, like in the next couple of days.
Objections?
Jason R. Thorpe thorpej@nas.nasa.gov
NASA Ames Research Center Home: +1 408 866 1912
NAS: M/S 258-5 Work: +1 650 604 0935
Moffett Field, CA 94035 Pager: +1 650 428 6939
Index: uvm/uvm_mmap.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_mmap.c,v
retrieving revision 1.10
diff -c -r1.10 uvm_mmap.c
*** uvm_mmap.c 1998/05/30 22:21:03 1.10
--- uvm_mmap.c 1998/06/29 05:00:36
***************
*** 302,309 ****
return (ENODEV); /* only mmap vnodes! */
vp = (struct vnode *)fp->f_data; /* convert to vnode */
! if (vp->v_type != VREG && vp->v_type != VCHR)
! return (ENODEV); /* only REG/CHR support mmap */
/* special case: catch SunOS style /dev/zero */
if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
--- 302,310 ----
return (ENODEV); /* only mmap vnodes! */
vp = (struct vnode *)fp->f_data; /* convert to vnode */
! if (vp->v_type != VREG && vp->v_type != VCHR &&
! vp->v_type != VBLK)
! return (ENODEV); /* only REG/CHR/BLK support mmap */
/* special case: catch SunOS style /dev/zero */
if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
***************
*** 907,913 ****
}
if (uobj == NULL)
! return((vp->v_type == VCHR) ? EINVAL : ENOMEM);
if ((flags & MAP_SHARED) == 0)
uvmflag |= UVM_FLAG_COPYONW;
--- 908,914 ----
}
if (uobj == NULL)
! return((vp->v_type == VREG) ? ENOMEM : EINVAL);
if ((flags & MAP_SHARED) == 0)
uvmflag |= UVM_FLAG_COPYONW;
Index: uvm/uvm_vnode.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_vnode.c,v
retrieving revision 1.12
diff -c -r1.12 uvm_vnode.c
*** uvm_vnode.c 1998/06/24 20:58:49 1.12
--- uvm_vnode.c 1998/06/29 05:00:38
***************
*** 61,66 ****
--- 61,72 ----
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/vnode.h>
+ #include <sys/disklabel.h>
+ #include <sys/ioctl.h>
+ #include <sys/fcntl.h>
+ #include <sys/conf.h>
+
+ #include <miscfs/specfs/specdev.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
***************
*** 173,183 ****
--- 179,192 ----
struct uvm_vnode *uvn = &vp->v_uvm;
struct vattr vattr;
int oldflags, result;
+ struct partinfo pi;
u_quad_t used_vnode_size;
UVMHIST_FUNC("uvn_attach"); UVMHIST_CALLED(maphist);
UVMHIST_LOG(maphist, "(vn=0x%x)", arg,0,0,0);
+ used_vnode_size = (u_quad_t)0; /* XXX gcc -Wuninitialized */
+
/*
* first get a lock on the uvn.
*/
***************
*** 192,197 ****
--- 201,215 ----
}
/*
+ * if we're maping a BLK device, make sure it is a disk.
+ */
+ if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK) {
+ simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */
+ UVMHIST_LOG(maphist,"<- done (VBLK not D_DISK!)", 0,0,0,0);
+ return(NULL);
+ }
+
+ /*
* now we have lock and uvn must not be in a blocked state.
* first check to see if it is already active, in which case
* we can bump the reference count, check to see if we need to
***************
*** 235,253 ****
uvn->u_flags = UVM_VNODE_ALOCK;
simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock in case we sleep */
/* XXX: curproc? */
- result = VOP_GETATTR(vp, &vattr, curproc->p_ucred, curproc);
! /*
! * make sure that the newsize fits within a vm_offset_t
! * XXX: need to revise addressing data types
! */
! used_vnode_size = vattr.va_size;
! if (used_vnode_size > (vm_offset_t) -PAGE_SIZE) {
! #ifdef DEBUG
! printf("uvn_attach: vn %p size truncated %qx->%x\n", vp,
! used_vnode_size, -PAGE_SIZE);
! #endif
! used_vnode_size = (vm_offset_t) -PAGE_SIZE;
}
/* relock object */
--- 253,279 ----
uvn->u_flags = UVM_VNODE_ALOCK;
simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock in case we sleep */
/* XXX: curproc? */
! if (vp->v_type == VBLK) {
! /*
! * We could implement this as a specfs getattr call, but:
! *
! * (1) VOP_GETATTR() would get the file system
! * vnode operation, not the specfs operation.
! *
! * (2) All we want is the size, anyhow.
! */
! result = (*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev,
! DIOCGPART, (caddr_t)&pi, FREAD, curproc);
! if (result == 0) {
! /* XXX should remember blocksize */
! used_vnode_size = (u_quad_t)pi.disklab->d_secsize *
! (u_quad_t)pi.part->p_size;
! }
! } else {
! result = VOP_GETATTR(vp, &vattr, curproc->p_ucred, curproc);
! if (result == 0)
! used_vnode_size = vattr.va_size;
}
/* relock object */
***************
*** 261,267 ****
UVMHIST_LOG(maphist,"<- done (VOP_GETATTR FAILED!)", 0,0,0,0);
return(NULL);
}
!
/*
* now set up the uvn.
*/
--- 287,306 ----
UVMHIST_LOG(maphist,"<- done (VOP_GETATTR FAILED!)", 0,0,0,0);
return(NULL);
}
!
! /*
! * make sure that the newsize fits within a vm_offset_t
! * XXX: need to revise addressing data types
! */
! if (vp->v_type == VBLK) printf("used_vnode_size = %qu\n", used_vnode_size);
! if (used_vnode_size > (vm_offset_t) -PAGE_SIZE) {
! #ifdef DEBUG
! printf("uvn_attach: vn %p size truncated %qx->%x\n", vp,
! used_vnode_size, -PAGE_SIZE);
! #endif
! used_vnode_size = (vm_offset_t) -PAGE_SIZE;
! }
!
/*
* now set up the uvn.
*/
Index: vm/vm_mmap.c
===================================================================
RCS file: /cvsroot/src/sys/vm/vm_mmap.c,v
retrieving revision 1.58
diff -c -r1.58 vm_mmap.c
*** vm_mmap.c 1998/05/30 22:21:03 1.58
--- vm_mmap.c 1998/06/29 05:00:38
***************
*** 223,229 ****
if (fp->f_type != DTYPE_VNODE)
return (ENODEV);
vp = (struct vnode *)fp->f_data;
! if (vp->v_type != VREG && vp->v_type != VCHR)
return (ENODEV);
/*
* XXX hack to handle use of /dev/zero to map anon
--- 223,230 ----
if (fp->f_type != DTYPE_VNODE)
return (ENODEV);
vp = (struct vnode *)fp->f_data;
! if (vp->v_type != VREG && vp->v_type != VCHR &&
! vp->v_type != VBLK)
return (ENODEV);
/*
* XXX hack to handle use of /dev/zero to map anon
***************
*** 824,830 ****
goto out;
}
/*
! * A regular file
*/
else {
#ifdef DEBUG
--- 825,831 ----
goto out;
}
/*
! * A regular file or block special file
*/
else {
#ifdef DEBUG
Index: vm/vnode_pager.c
===================================================================
RCS file: /cvsroot/src/sys/vm/vnode_pager.c,v
retrieving revision 1.37
diff -c -r1.37 vnode_pager.c
*** vnode_pager.c 1998/06/24 20:58:49 1.37
--- vnode_pager.c 1998/06/29 05:00:38
***************
*** 55,63 ****
--- 55,69 ----
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/vnode.h>
+ #include <sys/disklabel.h>
+ #include <sys/ioctl.h>
+ #include <sys/fcntl.h>
+ #include <sys/conf.h>
#include <sys/uio.h>
#include <sys/mount.h>
+ #include <miscfs/specfs/specdev.h>
+
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vnode_pager.h>
***************
*** 130,135 ****
--- 136,142 ----
vm_object_t object;
struct vattr vattr;
struct vnode *vp;
+ struct partinfo pi;
u_quad_t used_vnode_size;
struct proc *p = curproc; /* XXX */
***************
*** 137,142 ****
--- 144,150 ----
if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC))
printf("vnode_pager_alloc(%p, %lx, %x)\n", handle, size, prot);
#endif
+
/*
* Pageout to vnode, no can do yet.
*/
***************
*** 144,154 ****
return(NULL);
/*
* Vnodes keep a pointer to any associated pager so no need to
* lookup with vm_pager_lookup.
*/
- vp = (struct vnode *)handle;
pager = (vm_pager_t)vp->v_vmdata;
if (pager == NULL) {
/*
* Allocate pager structures
--- 152,169 ----
return(NULL);
/*
+ * If we're mapping a BLK device, make sure it's a disk.
+ */
+ vp = (struct vnode *)handle;
+ if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK)
+ return (NULL);
+
+ /*
* Vnodes keep a pointer to any associated pager so no need to
* lookup with vm_pager_lookup.
*/
pager = (vm_pager_t)vp->v_vmdata;
+
if (pager == NULL) {
/*
* Allocate pager structures
***************
*** 162,177 ****
return(NULL);
}
/*
! * And an object of the appropriate size
*/
! if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) != 0) {
! free((caddr_t)vnp, M_VMPGDATA);
! free((caddr_t)pager, M_VMPAGER);
! return(NULL);
}
/* make sure mapping fits into numeric range,
truncate if necessary */
- used_vnode_size = vattr.va_size;
if (used_vnode_size > (vm_offset_t)-PAGE_SIZE) {
#ifdef DEBUG
printf("vnode_pager_alloc: vn %p size truncated %qx->%lx\n",
--- 177,213 ----
return(NULL);
}
/*
! * And an object of the appropriate size.
*/
! if (vp->v_type == VBLK) {
! /*
! * We could implement this as a specfs getattr
! * call, but:
! *
! * (1) VOP_GETATTR() would get the file system
! * vnode operation, not the specfs operation.
! *
! * (2) All we want is the size, anyhow.
! */
! if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev,
! DIOCGPART, (caddr_t)&pi, FREAD, p) != 0) {
! free((caddr_t)vnp, M_VMPGDATA);
! free((caddr_t)pager, M_VMPAGER);
! return(NULL);
! }
! /* XXX should remember blocksize */
! used_vnode_size = (u_quad_t)pi.disklab->d_secsize *
! (u_quad_t)pi.part->p_size;
! } else {
! if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) != 0) {
! free((caddr_t)vnp, M_VMPGDATA);
! free((caddr_t)pager, M_VMPAGER);
! return(NULL);
! }
! used_vnode_size = vattr.va_size;
}
/* make sure mapping fits into numeric range,
truncate if necessary */
if (used_vnode_size > (vm_offset_t)-PAGE_SIZE) {
#ifdef DEBUG
printf("vnode_pager_alloc: vn %p size truncated %qx->%lx\n",