Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/sparc64 many improvements:
details: https://anonhg.NetBSD.org/src/rev/9597f99f3e66
branches: trunk
changeset: 536798:9597f99f3e66
user: chs <chs%NetBSD.org@localhost>
date: Sun Sep 22 07:19:43 2002 +0000
description:
many improvements:
- use struct vm_page_md for attaching pv entries to struct vm_page
- change pseg_set()'s return value to indicate whether the spare page
was used as an L2 or L3 PTP.
- use a pool for pv entries instead of malloc().
- put PTPs on a list attached to the pmap so we can free them
more efficiently (by just walking the list) in pmap_destroy().
- use the new pmap_remove_all() interface to avoid flushing the cache and TLB
for each pmap_remove() that's done as we are tearing down an address space.
- in pmap_enter(), handle replacing an existing mapping more efficiently
than just calling pmap_remove() on it. also, skip flushing the
TSB and TLB if there was no previous mapping, since there can't be
anything we need to flush. also, preload the TSB if we're pre-setting
the mod/ref bits.
- allocate hardware contexts like the MIPS pmap:
allocate them all sequentially without reuse, then once we run out
just invalidate all user TLB entries and flush the entire L1 dcache.
- fix pmap_extract() for the case where the va is not page-aligned and
nothing is mapped there.
- fix calculation of TSB size. it was comparing physmem (which is
in units of pages) to constants that only make sense if they are
in units of bytes.
- avoid sleeping in pmap_enter(), instead let the caller do it.
- use pmap_kenter_pa() instead of pmap_enter() where appropriate.
- remove code to handle impossible cases in various functions.
- tweak asm code to pipeline a little better.
- remove many unnecessary spls and membars.
- lots of code cleanup.
- no doubt other stuff that I've forgotten.
the result of all this is that a fork+exit microbenchmark is 34% faster
and a fork+exec+exit microbenchmark is 28% faster.
diffstat:
sys/arch/sparc64/dev/iommu.c | 275 +--
sys/arch/sparc64/include/pmap.h | 70 +-
sys/arch/sparc64/include/pte.h | 9 +-
sys/arch/sparc64/include/vmparam.h | 28 +-
sys/arch/sparc64/sparc64/cache.h | 5 +-
sys/arch/sparc64/sparc64/cpu.c | 16 +-
sys/arch/sparc64/sparc64/locore.s | 370 +++-
sys/arch/sparc64/sparc64/machdep.c | 68 +-
sys/arch/sparc64/sparc64/mem.c | 4 +-
sys/arch/sparc64/sparc64/pmap.c | 2384 ++++++++++++++------------------
sys/arch/sparc64/sparc64/trap.c | 366 ++--
sys/arch/sparc64/sparc64/vm_machdep.c | 25 +-
12 files changed, 1698 insertions(+), 1922 deletions(-)
diffs (truncated from 6257 to 300 lines):
diff -r 9fa6c1a624d0 -r 9597f99f3e66 sys/arch/sparc64/dev/iommu.c
--- a/sys/arch/sparc64/dev/iommu.c Sun Sep 22 07:17:08 2002 +0000
+++ b/sys/arch/sparc64/dev/iommu.c Sun Sep 22 07:19:43 2002 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: iommu.c,v 1.57 2002/08/29 04:43:43 chs Exp $ */
+/* $NetBSD: iommu.c,v 1.58 2002/09/22 07:19:43 chs Exp $ */
/*
* Copyright (c) 2001, 2002 Eduardo Horvath
@@ -87,8 +87,8 @@
psize_t size;
vaddr_t va;
paddr_t pa;
- struct vm_page *m;
- struct pglist mlist;
+ struct vm_page *pg;
+ struct pglist pglist;
/*
* Setup the iommu.
@@ -121,9 +121,9 @@
* contiguous.
*/
- size = NBPG<<(is->is_tsbsize);
+ size = NBPG << is->is_tsbsize;
if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
- (paddr_t)NBPG, (paddr_t)0, &mlist, 1, 0) != 0)
+ (paddr_t)NBPG, (paddr_t)0, &pglist, 1, 0) != 0)
panic("iommu_init: no memory");
va = uvm_km_valloc(kernel_map, size);
@@ -131,19 +131,16 @@
panic("iommu_init: no memory");
is->is_tsb = (int64_t *)va;
- m = TAILQ_FIRST(&mlist);
- is->is_ptsb = VM_PAGE_TO_PHYS(m);
+ is->is_ptsb = VM_PAGE_TO_PHYS(TAILQ_FIRST(&pglist));
/* Map the pages */
- for (; m != NULL; m = TAILQ_NEXT(m,pageq)) {
- pa = VM_PAGE_TO_PHYS(m);
- pmap_enter(pmap_kernel(), va, pa | PMAP_NVC,
- VM_PROT_READ|VM_PROT_WRITE,
- VM_PROT_READ|VM_PROT_WRITE|PMAP_WIRED);
+ TAILQ_FOREACH(pg, &pglist, pageq) {
+ pa = VM_PAGE_TO_PHYS(pg);
+ pmap_kenter_pa(va, pa | PMAP_NVC, VM_PROT_READ | VM_PROT_WRITE);
va += NBPG;
}
pmap_update(pmap_kernel());
- bzero(is->is_tsb, size);
+ memset(is->is_tsb, 0, size);
#ifdef DEBUG
if (iommudebug & IDB_INFO)
@@ -164,7 +161,7 @@
(unsigned long long)bus_space_read_8(is->is_bustag,
is->is_iommu,
offsetof (struct iommureg, iommu_tsb)));
- printf("TSB base %p phys %llx\n", (void *)is->is_tsb,
+ printf("TSB base %p phys %llx\n", (void *)is->is_tsb,
(unsigned long long)is->is_ptsb);
delay(1000000); /* 1 s */
}
@@ -178,10 +175,10 @@
/*
* Now all the hardware's working we need to allocate a dvma map.
*/
- printf("DVMA map: %x to %x\n",
+ printf("DVMA map: %x to %x\n",
(unsigned int)is->is_dvmabase,
(unsigned int)is->is_dvmaend);
- printf("IOTSB: %llx to %llx\n",
+ printf("IOTSB: %llx to %llx\n",
(unsigned long long)is->is_ptsb,
(unsigned long long)(is->is_ptsb + size));
is->is_dvmamap = extent_create(name,
@@ -202,26 +199,27 @@
struct strbuf_ctl *sb;
/* Need to do 64-bit stores */
- bus_space_write_8(is->is_bustag, is->is_iommu, IOMMUREG(iommu_tsb),
+ bus_space_write_8(is->is_bustag, is->is_iommu, IOMMUREG(iommu_tsb),
is->is_ptsb);
/* Enable IOMMU in diagnostic mode */
bus_space_write_8(is->is_bustag, is->is_iommu, IOMMUREG(iommu_cr),
is->is_cr|IOMMUCR_DE);
- for (i=0; i<2; i++) {
+ for (i = 0; i < 2; i++) {
if ((sb = is->is_sb[i])) {
/* Enable diagnostics mode? */
- bus_space_write_8(is->is_bustag, is->is_sb[i]->sb_sb,
+ bus_space_write_8(is->is_bustag, is->is_sb[i]->sb_sb,
STRBUFREG(strbuf_ctl), STRBUF_EN);
/* No streaming buffers? Disable them */
- if (bus_space_read_8(is->is_bustag,
- is->is_sb[i]->sb_sb,
+ if (bus_space_read_8(is->is_bustag,
+ is->is_sb[i]->sb_sb,
STRBUFREG(strbuf_ctl)) == 0) {
is->is_sb[i]->sb_flush = NULL;
} else {
+
/*
* locate the pa of the flush buffer.
*/
@@ -234,7 +232,7 @@
}
/*
- * Here are the iommu control routines.
+ * Here are the iommu control routines.
*/
void
iommu_enter(sb, va, pa, flags)
@@ -260,16 +258,16 @@
/* If we can't flush the strbuf don't enable it. */
strbuf = 0;
- tte = MAKEIOTTE(pa, !(flags & BUS_DMA_NOWRITE),
+ tte = MAKEIOTTE(pa, !(flags & BUS_DMA_NOWRITE),
!(flags & BUS_DMA_NOCACHE), (strbuf));
#ifdef DEBUG
tte |= (flags & 0xff000LL)<<(4*8);
#endif
-
- DPRINTF(IDB_IOMMU, ("Clearing TSB slot %d for va %p\n",
+
+ DPRINTF(IDB_IOMMU, ("Clearing TSB slot %d for va %p\n",
(int)IOTSBSLOT(va,is->is_tsbsize), (void *)(u_long)va));
is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)] = tte;
- bus_space_write_8(is->is_bustag, is->is_iommu,
+ bus_space_write_8(is->is_bustag, is->is_iommu,
IOMMUREG(iommu_flush), va);
DPRINTF(IDB_IOMMU, ("iommu_enter: va %lx pa %lx TSB[%lx]@%p=%lx\n",
va, (long)pa, (u_long)IOTSBSLOT(va,is->is_tsbsize),
@@ -277,7 +275,6 @@
(u_long)tte));
}
-
/*
* Find the value of a DVMA address (debug routine).
*/
@@ -287,7 +284,7 @@
vaddr_t dva;
{
int64_t tte = 0;
-
+
if (dva >= is->is_dvmabase && dva < is->is_dvmaend)
tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
@@ -314,9 +311,9 @@
if (va < is->is_dvmabase || va > is->is_dvmaend)
panic("iommu_remove: va 0x%lx not in DVMA space", (u_long)va);
if ((long)(va + len) < (long)va)
- panic("iommu_remove: va 0x%lx + len 0x%lx wraps",
+ panic("iommu_remove: va 0x%lx + len 0x%lx wraps",
(long) va, (long) len);
- if (len & ~0xfffffff)
+ if (len & ~0xfffffff)
panic("iommu_remove: rediculous len 0x%lx", (u_long)len);
#endif
@@ -336,13 +333,13 @@
/* XXX Zero-ing the entry would not require RMW */
is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)] &= ~IOTTE_V;
- bus_space_write_8(is->is_bustag, is->is_iommu,
+ bus_space_write_8(is->is_bustag, is->is_iommu,
IOMMUREG(iommu_flush), va);
va += NBPG;
}
}
-static int
+static int
iommu_strbuf_flush_done(sb)
struct strbuf_ctl *sb;
{
@@ -362,10 +359,10 @@
if (!sb->sb_flush)
return (0);
-
+
/*
* Streaming buffer flushes:
- *
+ *
* 1 Tell strbuf to flush by storing va to strbuf_pgflush. If
* we're not on a cache line boundary (64-bits):
* 2 Store 0 in flag
@@ -377,16 +374,16 @@
*/
*sb->sb_flush = 0;
- bus_space_write_8(is->is_bustag, sb->sb_sb,
+ bus_space_write_8(is->is_bustag, sb->sb_sb,
STRBUFREG(strbuf_flushsync), sb->sb_flushpa);
- microtime(&flushtimeout);
+ microtime(&flushtimeout);
cur = flushtimeout;
BUMPTIME(&flushtimeout, 500000); /* 1/2 sec */
-
+
DPRINTF(IDB_IOMMU, ("iommu_strbuf_flush_done: flush = %lx "
"at va = %lx pa = %lx now=%lx:%lx until = %lx:%lx\n",
- (long)*sb->sb_flush, (long)sb->sb_flush, (long)sb->sb_flushpa,
+ (long)*sb->sb_flush, (long)sb->sb_flush, (long)sb->sb_flushpa,
cur.tv_sec, cur.tv_usec,
flushtimeout.tv_sec, flushtimeout.tv_usec));
@@ -432,7 +429,7 @@
bus_size_t align, boundary;
vaddr_t vaddr = (vaddr_t)buf;
int seg;
- pmap_t pmap;
+ struct pmap *pmap;
if (map->dm_nsegs) {
/* Already in use?? */
@@ -441,11 +438,11 @@
#endif
bus_dmamap_unload(t, map);
}
+
/*
* Make sure that on error condition we return "no valid mappings".
*/
map->dm_nsegs = 0;
-
if (buflen > map->_dm_size) {
DPRINTF(IDB_BUSDMA,
("iommu_dvmamap_load(): error %d > %d -- "
@@ -462,26 +459,27 @@
if ((boundary = (map->dm_segs[0]._ds_boundary)) == 0)
boundary = map->_dm_boundary;
align = max(map->dm_segs[0]._ds_align, NBPG);
- s = splhigh();
- /*
- * If our segment size is larger than the boundary we need to
+
+ /*
+ * If our segment size is larger than the boundary we need to
* split the transfer up int little pieces ourselves.
*/
- err = extent_alloc(is->is_dvmamap, sgsize, align,
- (sgsize > boundary) ? 0 : boundary,
+ s = splhigh();
+ err = extent_alloc(is->is_dvmamap, sgsize, align,
+ (sgsize > boundary) ? 0 : boundary,
EX_NOWAIT|EX_BOUNDZERO, &dvmaddr);
splx(s);
#ifdef DEBUG
- if (err || (dvmaddr == (bus_addr_t)-1))
- {
+ if (err || (dvmaddr == (bus_addr_t)-1))
+ {
printf("iommu_dvmamap_load(): extent_alloc(%d, %x) failed!\n",
(int)sgsize, flags);
#ifdef DDB
Debugger();
#endif
- }
-#endif
+ }
+#endif
if (err != 0)
return (err);
@@ -506,7 +504,7 @@
/* Oops. We crossed a boundary. Split the xfer. */
DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
"seg %d start %lx size %lx\n", seg,
- (long)map->dm_segs[seg].ds_addr,
+ (long)map->dm_segs[seg].ds_addr,
map->dm_segs[seg].ds_len));
map->dm_segs[seg].ds_len =
boundary - (sgstart & (boundary - 1));
@@ -539,6 +537,7 @@
pmap = pmap_kernel();
for (; buflen > 0; ) {
+
/*
* Get the physical address for this page.
*/
@@ -561,7 +560,7 @@
(long)(curaddr & ~(NBPG-1))));
iommu_enter(sb, trunc_page(dvmaddr), trunc_page(curaddr),
Home |
Main Index |
Thread Index |
Old Index