Subject: Decoupling MAXPHYS and MAXBSIZE (includes patch)
To: None <tech-kern@netbsd.org>
From: Thor Lancelot Simon <tls@rek.tjls.com>
List: tech-kern
Date: 04/20/2003 13:28:03
The patch below replaces what I believe to be uses of MAXBSIZE where
MAXPHYS would be more correct. It's important for a few reasons:
1) It's a necessary first step towards per-device MAXPHYS.
2) It corrects the abstraction violation of using filesystem constants
in device drivers.
3) It allows clustering of file data to MAXPHYS on systems which need
to reduce MAXBSIZE in order to get more metadata buffers (e.g. for
directories on machines with very large numbers of directories,
such as nbanoncvs). Without this patch, such machines can be
tweaked to efficiently do small I/O sizes only, but can't manage
to both efficiently cluster larger I/O and actually cache all the
directories needed to do small I/O to many directories at once
in an efficient manner.
4) It will allow increasing MAXPHYS on hardware that supports it (even
without #1 above) without burning a huge amount of KVA by increasing
MAXBSIZE.
I've tested this patch as follows:
1) With 64K MAXBSIZE and MAXPHYS (equivalent to the current default on
i386).
2) With 16K MAXBSIZE and 64K MAXPHYS (like nbanoncvs): there is no
filesystem corruption, and reads and writes are clustered to 64K.
3) With 16K MAXBSIZE and 63K MAXPHYS (like some ports that are limited
to a 64K-1 transfer count). There is no filesystem corruption, and
reads and writes are clustered to 32K. I believe that these should
be clustered to 48K (and would have been in the pre-UBC world AFAICT)
but a limitation in the clustering code seems to mean we now get
power-of-2 clusters only. Still, it's no worse than it was.
A few developers have looked this over already, but comments would be
greatly appreciated.
Index: dev/ic/aic7xxx.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/aic7xxx.c,v
retrieving revision 1.97
diff -c -r1.97 aic7xxx.c
*** dev/ic/aic7xxx.c 2003/04/20 12:54:05 1.97
--- dev/ic/aic7xxx.c 2003/04/20 17:00:12
***************
*** 4358,4364 ****
next_scb->flags = SCB_FREE;
error = bus_dmamap_create(ahc->parent_dmat,
! AHC_MAXTRANSFER_SIZE, AHC_NSEG, MAXBSIZE, 0,
BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,
&next_scb->dmamap);
if (error != 0)
--- 4358,4364 ----
next_scb->flags = SCB_FREE;
error = bus_dmamap_create(ahc->parent_dmat,
! AHC_MAXTRANSFER_SIZE, AHC_NSEG, MAXPHYS, 0,
BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,
&next_scb->dmamap);
if (error != 0)
Index: dev/ic/mpt_netbsd.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/mpt_netbsd.c,v
retrieving revision 1.4
diff -c -r1.4 mpt_netbsd.c
*** dev/ic/mpt_netbsd.c 2003/04/16 23:17:30 1.4
--- dev/ic/mpt_netbsd.c 2003/04/20 17:00:13
***************
*** 246,253 ****
req->sense_pbuf = (pptr - MPT_SENSE_SIZE);
req->sense_vbuf = (vptr - MPT_SENSE_SIZE);
! error = bus_dmamap_create(mpt->sc_dmat, MAXBSIZE,
! MPT_SGL_MAX, MAXBSIZE, 0, 0, &req->dmap);
if (error) {
aprint_error("%s: unable to create req %d DMA map, "
"error = %d\n", mpt->sc_dev.dv_xname, i, error);
--- 246,253 ----
req->sense_pbuf = (pptr - MPT_SENSE_SIZE);
req->sense_vbuf = (vptr - MPT_SENSE_SIZE);
! error = bus_dmamap_create(mpt->sc_dmat, MAXPHYS,
! MPT_SGL_MAX, MAXPHYS, 0, 0, &req->dmap);
if (error) {
aprint_error("%s: unable to create req %d DMA map, "
"error = %d\n", mpt->sc_dev.dv_xname, i, error);
Index: miscfs/genfs/genfs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/miscfs/genfs/genfs_vnops.c,v
retrieving revision 1.75
diff -c -r1.75 genfs_vnops.c
*** miscfs/genfs/genfs_vnops.c 2003/04/10 21:53:33 1.75
--- miscfs/genfs/genfs_vnops.c 2003/04/20 17:00:16
***************
*** 1035,1041 ****
off_t endoff = ap->a_offhi;
off_t off;
int flags = ap->a_flags;
! const int maxpages = MAXBSIZE >> PAGE_SHIFT;
int i, s, error, npages, nback;
int freeflag;
struct vm_page *pgs[maxpages], *pg, *nextpg, *tpg, curmp, endmp;
--- 1035,1042 ----
off_t endoff = ap->a_offhi;
off_t off;
int flags = ap->a_flags;
! /* Even for strange MAXPHYS, the shift rounds down to a page */
! const int maxpages = MAXPHYS >> PAGE_SHIFT;
int i, s, error, npages, nback;
int freeflag;
struct vm_page *pgs[maxpages], *pg, *nextpg, *tpg, curmp, endmp;
Index: ufs/ufs/ufs_bmap.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ufs/ufs_bmap.c,v
retrieving revision 1.21
diff -c -r1.21 ufs_bmap.c
*** ufs/ufs/ufs_bmap.c 2003/04/02 10:39:44 1.21
--- ufs/ufs/ufs_bmap.c 2003/04/20 17:00:17
***************
*** 136,142 ****
* don't create a block larger than the device can handle.
*/
*runp = 0;
! maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1;
}
if (bn >= 0 && bn < NDADDR) {
--- 136,142 ----
* don't create a block larger than the device can handle.
*/
*runp = 0;
! maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1;
}
if (bn >= 0 && bn < NDADDR) {
Index: uvm/uvm_io.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_io.c,v
retrieving revision 1.17
diff -c -r1.17 uvm_io.c
*** uvm/uvm_io.c 2001/11/10 07:37:00 1.17
--- uvm/uvm_io.c 2003/04/20 17:00:17
***************
*** 94,100 ****
togo = togo - (endva - VM_MAXUSER_ADDRESS + 1);
pageoffset = baseva & PAGE_MASK;
baseva = trunc_page(baseva);
! chunksz = MIN(round_page(togo + pageoffset), MAXBSIZE);
error = 0;
/*
--- 94,100 ----
togo = togo - (endva - VM_MAXUSER_ADDRESS + 1);
pageoffset = baseva & PAGE_MASK;
baseva = trunc_page(baseva);
! chunksz = MIN(round_page(togo + pageoffset), trunc_page(MAXPHYS));
error = 0;
/*
Index: uvm/uvm_map.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_map.c,v
retrieving revision 1.136
diff -c -r1.136 uvm_map.c
*** uvm/uvm_map.c 2003/04/09 21:39:29 1.136
--- uvm/uvm_map.c 2003/04/20 17:00:19
***************
*** 2231,2236 ****
--- 2231,2237 ----
case MADV_NORMAL:
case MADV_RANDOM:
case MADV_SEQUENTIAL:
+ case MADV_WILLNEED:
/* nothing special here */
break;
Index: uvm/uvm_pager.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pager.c,v
retrieving revision 1.59
diff -c -r1.59 uvm_pager.c
*** uvm/uvm_pager.c 2002/11/09 20:09:52 1.59
--- uvm/uvm_pager.c 2003/04/20 17:00:20
***************
*** 95,101 ****
FALSE, NULL);
simple_lock_init(&pager_map_wanted_lock);
pager_map_wanted = FALSE;
! emergva = uvm_km_valloc(kernel_map, MAXBSIZE);
emerginuse = FALSE;
/*
--- 95,101 ----
FALSE, NULL);
simple_lock_init(&pager_map_wanted_lock);
pager_map_wanted = FALSE;
! emergva = uvm_km_valloc(kernel_map, round_page(MAXPHYS));
emerginuse = FALSE;
/*
***************
*** 162,168 ****
emerginuse = TRUE;
simple_unlock(&pager_map_wanted_lock);
kva = emergva;
! KASSERT(npages <= MAXBSIZE >> PAGE_SHIFT);
goto enter;
}
if ((flags & UVMPAGER_MAPIN_WAITOK) == 0) {
--- 162,169 ----
emerginuse = TRUE;
simple_unlock(&pager_map_wanted_lock);
kva = emergva;
! /* The shift implicitly truncates to PAGE_SIZE */
! KASSERT(npages <= (MAXPHYS >> PAGE_SHIFT));
goto enter;
}
if ((flags & UVMPAGER_MAPIN_WAITOK) == 0) {
Index: uvm/uvm_pdaemon.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pdaemon.c,v
retrieving revision 1.50
diff -c -r1.50 uvm_pdaemon.c
*** uvm/uvm_pdaemon.c 2003/02/25 00:22:20 1.50
--- uvm/uvm_pdaemon.c 2003/04/20 17:00:20
***************
*** 362,368 ****
struct vm_page *p, *nextpg = NULL; /* Quell compiler warning */
struct uvm_object *uobj;
struct vm_anon *anon;
! struct vm_page *swpps[MAXBSIZE >> PAGE_SHIFT];
struct simplelock *slock;
int swnpages, swcpages;
int swslot;
--- 362,368 ----
struct vm_page *p, *nextpg = NULL; /* Quell compiler warning */
struct uvm_object *uobj;
struct vm_anon *anon;
! struct vm_page *swpps[round_page(MAXPHYS) >> PAGE_SHIFT];
struct simplelock *slock;
int swnpages, swcpages;
int swslot;
***************
*** 621,627 ****
*/
if (swslot == 0) {
! swnpages = MAXBSIZE >> PAGE_SHIFT;
swslot = uvm_swap_alloc(&swnpages, TRUE);
if (swslot == 0) {
simple_unlock(slock);
--- 621,629 ----
*/
if (swslot == 0) {
! /* Even with strange MAXPHYS, the shift
! implicitly rounds down to a page. */
! swnpages = MAXPHYS >> PAGE_SHIFT;
swslot = uvm_swap_alloc(&swnpages, TRUE);
if (swslot == 0) {
simple_unlock(slock);