Subject: Re: Multiple faulting
To: None <tech-kern@netbsd.org>
From: Charles M. Hannum <root@ihack.net>
List: tech-kern
Date: 03/26/1999 00:11:25
There was a bug in my previous patch that caused copy-on-write pages
to usually not be cached after being write faulted.  The following
fixes that, as well as cleaning up a lot of other cruft regarding
pmap_nightmare().

It may be possible to optimize pmap_nightmare() slightly.  It should
only need to walk through and change the PTEs if the mappings were
already cachable.  So we should be able to just stash a pointer to the
first pv_entry which is not for the one being updated, and check
whether its PTE is marked as cachable.

This same logic could be used to opportunistically enable caching
after pmap_remove_pv() (perhaps flipping things around so that
pmap_nightmare() is actually called at the end of pmap_enter(), and
doesn't take a va, so that it's callable from both places).

-----8<-----snip-----8<-----snip-----8<-----snip-----8<-----snip-----8<-----
*** arch/arm32/arm32/pmap.c~	Thu Mar 25 04:46:31 1999
--- arch/arm32/arm32/pmap.c	Thu Mar 25 23:16:35 1999
***************
*** 167,170 ****
--- 167,172 ----
       vm_offset_t l2pa));
  
+ int mycroft_hack = 0;
+ 
  /* Function to set the debug level of the pmap code */
  
***************
*** 412,415 ****
--- 414,421 ----
  	    pv, pv->pv_va, pv->pv_pmap, pv->pv_next));
  
+ 	/* Update the wiring stats for the new page */
+ 	if (flags & PT_W)
+ 		++pmap->pm_stats.wired_count;
+ 
  	if (pv->pv_pmap == NULL) {
  		/*
***************
*** 447,451 ****
   */
  
! /* __inline*/ u_int
  pmap_remove_pv(pmap, va, pv)
  	pmap_t pmap;
--- 453,457 ----
   */
  
! /* __inline*/ void
  pmap_remove_pv(pmap, va, pv)
  	pmap_t pmap;
***************
*** 458,462 ****
      
  	if (!pmap_initialized)
! 		return(0);
  
  	/*
--- 464,468 ----
      
  	if (!pmap_initialized)
! 		return;
  
  	/*
***************
*** 495,500 ****
  		}
  	}
  	(void)splx(s);
- 	return(flags);
  }
  
--- 501,507 ----
  		}
  	}
+ 	if (flags & PT_W)
+ 		--pmap->pm_stats.wired_count;
  	(void)splx(s);
  }
  
***************
*** 513,517 ****
  	struct pv_entry *npv;
  	u_int s;
! 	u_int flags;
  
  	PDEBUG(5, printf("pmap_modify_pv(pmap=%p, va=%08lx, pv=%p, bic_mask=%08x, eor_mask=%08x)\n",
--- 520,524 ----
  	struct pv_entry *npv;
  	u_int s;
! 	u_int flags, oflags;
  
  	PDEBUG(5, printf("pmap_modify_pv(pmap=%p, va=%08lx, pv=%p, bic_mask=%08x, eor_mask=%08x)\n",
***************
*** 532,540 ****
  	for (npv = pv; npv; npv = npv->pv_next) {
  		if (pmap == npv->pv_pmap && va == npv->pv_va) {
! 			flags = npv->pv_flags;
! 			npv->pv_flags = ((flags & ~bic_mask) ^ eor_mask);
  			PDEBUG(0, printf("done flags=%08x\n", flags));
  			(void)splx(s);
! 			return(flags);
  		}
  	}
--- 539,554 ----
  	for (npv = pv; npv; npv = npv->pv_next) {
  		if (pmap == npv->pv_pmap && va == npv->pv_va) {
! 			oflags = npv->pv_flags;
! 			npv->pv_flags = flags =
! 			    ((oflags & ~bic_mask) ^ eor_mask);
! 			if ((flags ^ oflags) & PT_W) {
! 				if (flags & PT_W)
! 					++pmap->pm_stats.wired_count;
! 				else
! 					--pmap->pm_stats.wired_count;
! 			}
  			PDEBUG(0, printf("done flags=%08x\n", flags));
  			(void)splx(s);
! 			return (oflags);
  		}
  	}
***************
*** 972,976 ****
  		/* XXX should be done better than this */
  		pte = pmap_pte(pmap_kernel(), va);
! 		*pte = (*pte) & ~(PT_C | PT_B);
  
  		va += NBPG;
--- 986,990 ----
  		/* XXX should be done better than this */
  		pte = pmap_pte(pmap_kernel(), va);
! 		*pte = *pte & ~(PT_C | PT_B);
  
  		va += NBPG;
***************
*** 1089,1093 ****
  	/* XXX should be done better than this */
  	pte = pmap_pte(kernel_pmap, pmap->pm_vptpt);
! 	*pte = (*pte) & ~(PT_C | PT_B);
  
  	/* Wire in this page table */
--- 1103,1107 ----
  	/* XXX should be done better than this */
  	pte = pmap_pte(kernel_pmap, pmap->pm_vptpt);
! 	*pte = *pte & ~(PT_C | PT_B);
  
  	/* Wire in this page table */
***************
*** 1685,1695 ****
  				if ((bank = vm_physseg_find(atop(pa), &off))
  				    != -1) {
! 					int flags;
! 
! 					flags = pmap_remove_pv(pmap, sva,
  				    	    &vm_physmem[bank].pmseg.pvent[off]);
- 					if (flags & PT_W)
- 						pmap->pm_stats.wired_count--;
- 
  				}
  			}
--- 1699,1704 ----
  				if ((bank = vm_physseg_find(atop(pa), &off))
  				    != -1) {
! 					pmap_remove_pv(pmap, sva,
  				    	    &vm_physmem[bank].pmseg.pvent[off]);
  				}
  			}
***************
*** 1873,1877 ****
  		/* Clear write flag */
  		if ((bank = vm_physseg_find(atop(pa), &off)) != -1)
! 			pmap_modify_pv(pmap, sva,
  			    &vm_physmem[bank].pmseg.pvent[off], PT_Wr, 0);
  next:
--- 1882,1886 ----
  		/* Clear write flag */
  		if ((bank = vm_physseg_find(atop(pa), &off)) != -1)
! 			(void) pmap_modify_pv(pmap, sva,
  			    &vm_physmem[bank].pmseg.pvent[off], PT_Wr, 0);
  next:
***************
*** 1884,1888 ****
  }
  
! 
  int
  pmap_nightmare(pmap, pv, va, prot)
--- 1893,1902 ----
  }
  
! /*
!  * Since we have a virtually indexed cache, we may need to inhibit caching if
!  * there is more than one mapping and at least one of them is writable.
!  * Since we purge the cache on every context switch, we only need to check for
!  * other mappings within the same pmap.
!  */
  int
  pmap_nightmare(pmap, pv, va, prot)
***************
*** 1893,1969 ****
  {
  	struct pv_entry *npv;
  	int entries = 0;
  	int writeable = 0;
- 	int cacheable = 0;
- 
- 	/* We may need to inhibit the cache on all the mappings */
  
  	if (pv->pv_pmap == NULL)
! 		panic("pmap_enter: pv_entry has been lost\n");
! 
! 	/*
! 	 * There is at least one other VA mapping this page.
! 	 */
! 	for (npv = pv; npv; npv = npv->pv_next) {
! 		/* Count mappings in the same pmap */
! 		if (pmap == npv->pv_pmap) {
! 			++entries;
! 			/* Writeable mappings */
! 			if (npv->pv_flags & PT_Wr)
! 				++writeable;
! 		}
! 	}
! 	if (entries > 1) {
! /*		printf("pmap_nightmare: e=%d w=%d p=%d c=%d va=%lx [",
! 		    entries, writeable, (prot & VM_PROT_WRITE), cacheable, va);*/
! 		for (npv = pv; npv; npv = npv->pv_next) {
! 			/* Count mappings in the same pmap */
! 			if (pmap == npv->pv_pmap) {
! /*				printf("va=%lx ", npv->pv_va);*/
! 			}
! 		}
! /*		printf("]\n");*/
! #if 0
! 		if (writeable || (prot & VM_PROT_WRITE))) {
! 			for (npv = pv; npv; npv = npv->pv_next) {
! 				/* Revoke cacheability */
! 				if (pmap == npv->pv_pmap) {
! 					pte = pmap_pte(pmap, npv->pv_va);
! 					*pte = (*pte) & ~(PT_C | PT_B);
! 				}
! 			}
! 		}
  #endif
- 	} else {
- 		if ((prot & VM_PROT_WRITE) == 0)
- 			cacheable = PT_C;
- /*		if (cacheable == 0)
- 			printf("pmap_nightmare: w=%d p=%d va=%lx c=%d\n",
- 			    writeable, (prot & VM_PROT_WRITE), va, cacheable);*/
- 	}
- 	return(cacheable);
- }
  
! 
! int
! pmap_nightmare1(pmap, pv, va, prot, cacheable)
! 	pmap_t pmap;
! 	struct pv_entry *pv;
! 	vm_offset_t va;
! 	vm_prot_t prot;
! 	int cacheable;
! {
! 	struct pv_entry *npv;
! 	int entries = 0;
! 	int writeable = 0;
! 
! 	/* We may need to inhibit the cache on all the mappings */
! 
! 	if (pv->pv_pmap == NULL)
! 		panic("pmap_enter: pv_entry has been lost\n");
! 
! 	/*
! 	 * There is at least one other VA mapping this page.
! 	 */
  	for (npv = pv; npv; npv = npv->pv_next) {
  		/* Count mappings in the same pmap */
--- 1907,1920 ----
  {
  	struct pv_entry *npv;
+ 	pt_entry_t *pte;
  	int entries = 0;
  	int writeable = 0;
  
+ #ifdef DIAGNOSTIC
  	if (pv->pv_pmap == NULL)
! 		panic("pmap_enter: pv_entry has been lost");
  #endif
  
! 	/* Count mappings and writable mappings. */
  	for (npv = pv; npv; npv = npv->pv_next) {
  		/* Count mappings in the same pmap */
***************
*** 1975,2009 ****
  		}
  	}
! 	if (entries > 1) {
! /*		printf("pmap_nightmare1: e=%d w=%d p=%d c=%d va=%lx [",
! 		    entries, writeable, (prot & VM_PROT_WRITE), cacheable, va);*/
  		for (npv = pv; npv; npv = npv->pv_next) {
! 			/* Count mappings in the same pmap */
! 			if (pmap == npv->pv_pmap) {
! /*				printf("va=%lx ", npv->pv_va);*/
! 			}
! 		}
! /*		printf("]\n");*/
! #if 0
! 		if (writeable || (prot & VM_PROT_WRITE))) {
! 			for (npv = pv; npv; npv = npv->pv_next) {
! 				/* Revoke cacheability */
! 				if (pmap == npv->pv_pmap) {
! 					pte = pmap_pte(pmap, npv->pv_va);
! 					*pte = (*pte) & ~(PT_C | PT_B);
! 				}
  			}
  		}
! #endif
! 	} else {
! /*		cacheable = PT_C;*/
! /*		printf("pmap_nightmare1: w=%d p=%d va=%lx c=%d\n",
! 		    writeable, (prot & VM_PROT_WRITE), va, cacheable);*/
! 	}
! 
! 	return(cacheable);
  }
  
- 
  /*
   * void pmap_enter(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_prot_t prot,
--- 1926,1943 ----
  		}
  	}
! 
! 	if (entries > 1 && writeable) {
! 		/* Revoke cacheability. */
  		for (npv = pv; npv; npv = npv->pv_next) {
! 			if (pmap == npv->pv_pmap && va != npv->pv_va) {
! 				pte = pmap_pte(pmap, npv->pv_va);
! 				*pte = *pte & ~(PT_C | PT_B);
  			}
  		}
! 		return (0);
! 	} else
! 		return (PT_C | PT_B);
  }
  
  /*
   * void pmap_enter(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_prot_t prot,
***************
*** 2107,2110 ****
--- 2041,2052 ----
  	}
  
+ 	flags = 0;
+ 	if (prot & VM_PROT_WRITE)
+ 		flags |= PT_Wr;
+ 	if (va >= VM_MIN_ADDRESS && va < VM_MAXUSER_ADDRESS)
+ 		flags |= PT_Us;
+ 	if (wired)
+ 		flags |= PT_W;
+ 
  	/* More debugging info */
  	PDEBUG(5, printf("pmap_enter: pte for V%08lx = V%p (%08x)\n", va, pte,
***************
*** 2129,2141 ****
  			if ((bank = vm_physseg_find(atop(pa), &off)) != -1) {
  				pv = &vm_physmem[bank].pmseg.pvent[off];
! 				flags = pmap_modify_pv(pmap, va, pv, 0, 0) & PT_W;
! 				if (flags && !wired)
! 					--pmap->pm_stats.wired_count;
! 				else if (!flags && wired)
! 					++pmap->pm_stats.wired_count;
! 
!  				cacheable = pmap_nightmare1(pmap, pv, va, prot, cacheable);
   			} else
! 				cacheable = *pte & PT_C;
  		} else {
  			/* We are replacing the page with a new one. */
--- 2071,2079 ----
  			if ((bank = vm_physseg_find(atop(pa), &off)) != -1) {
  				pv = &vm_physmem[bank].pmseg.pvent[off];
! 				(void) pmap_modify_pv(pmap, va, pv,
! 				    PT_Wr | PT_Us | PT_W, flags);
!  				cacheable = pmap_nightmare(pmap, pv, va, prot);
   			} else
! 				cacheable = *pte & (PT_C | PT_B);
  		} else {
  			/* We are replacing the page with a new one. */
***************
*** 2151,2157 ****
  			if ((bank = vm_physseg_find(atop(opa), &off)) != -1) {
  				pv = &vm_physmem[bank].pmseg.pvent[off];
! 				flags = pmap_remove_pv(pmap, va, pv) & PT_W;
! 				if (flags)
! 					--pmap->pm_stats.wired_count;
  			}
  
--- 2089,2093 ----
  			if ((bank = vm_physseg_find(atop(opa), &off)) != -1) {
  				pv = &vm_physmem[bank].pmseg.pvent[off];
! 				pmap_remove_pv(pmap, va, pv);
  			}
  
***************
*** 2159,2170 ****
  		}
  	} else {
  		/* pte is not valid so we must be hooking in a new page */
  		++pmap->pm_stats.resident_count;
  
  	enter:
- 		/* Update the wiring stats for the new page */
- 		if (wired)
- 			++pmap->pm_stats.wired_count;
- 
  		/*
  		 * Enter on the PV list if part of our managed memory
--- 2095,2104 ----
  		}
  	} else {
+ 		opa = 0;
+ 
  		/* pte is not valid so we must be hooking in a new page */
  		++pmap->pm_stats.resident_count;
  
  	enter:
  		/*
  		 * Enter on the PV list if part of our managed memory
***************
*** 2172,2177 ****
  		if ((bank = vm_physseg_find(atop(pa), &off)) != -1) {
  			pv = &vm_physmem[bank].pmseg.pvent[off];
! 			if (pmap_enter_pv(pmap, va, pv, 0)) 
! 				cacheable = PT_C;
  			else
  				cacheable = pmap_nightmare(pmap, pv, va, prot);
--- 2106,2111 ----
  		if ((bank = vm_physseg_find(atop(pa), &off)) != -1) {
  			pv = &vm_physmem[bank].pmseg.pvent[off];
! 			if (pmap_enter_pv(pmap, va, pv, flags)) 
! 				cacheable = PT_C | PT_B;
  			else
  				cacheable = pmap_nightmare(pmap, pv, va, prot);
***************
*** 2181,2185 ****
  
  #ifdef MYCROFT_HACK
! 	printf("pmap_enter: pmap=%p va=%lx pa=%lx opa=%lx bank=%d off=%d pv=%p\n", pmap, va, pa, opa, bank, off, pv);
  #endif
  
--- 2115,2120 ----
  
  #ifdef MYCROFT_HACK
! 	if (mycroft_hack)
! 		printf("pmap_enter: pmap=%p va=%lx pa=%lx opa=%lx bank=%d off=%d pv=%p\n", pmap, va, pa, opa, bank, off, pv);
  #endif
  
***************
*** 2192,2196 ****
  #endif	/* DIAGNOSTIC */
  
! 	if (va >= VM_MIN_ADDRESS && va < VM_MAXUSER_ADDRESS)
  		npte |= PT_AP(AP_U);
  	if (va >= VM_MAXUSER_ADDRESS && va < VM_MAX_ADDRESS) {
--- 2127,2131 ----
  #endif	/* DIAGNOSTIC */
  
! 	if (flags & PT_Us)
  		npte |= PT_AP(AP_U);
  	if (va >= VM_MAXUSER_ADDRESS && va < VM_MAX_ADDRESS) {
***************
*** 2206,2210 ****
  		 * handling below.
  		 */
! 		if (!wired)
  			panic("pmap_enter: bogon bravo");
  		if (!pv)
--- 2141,2145 ----
  		 * handling below.
  		 */
! 		if (~flags & PT_W)
  			panic("pmap_enter: bogon bravo");
  		if (!pv)
***************
*** 2231,2235 ****
  		 */
  		ftype &= prot;
! 		npte |= PT_B | cacheable;
  		if (ftype & VM_PROT_WRITE) {
  			npte |= L2_SPAGE | PT_AP(AP_W);
--- 2166,2170 ----
  		 */
  		ftype &= prot;
! 		npte |= cacheable;
  		if (ftype & VM_PROT_WRITE) {
  			npte |= L2_SPAGE | PT_AP(AP_W);
***************
*** 2249,2254 ****
  	}
  
! #ifdef MYCROFT_HACK
! 	printf("pmap_enter: pmap=%p va=%lx pa=%lx prot=%x wired=%d ftype=%x npte=%08x\n", pmap, va, pa, prot, wired, ftype, npte);
  	//if (pmap_initialized)
  	//	console_debugger();
--- 2184,2190 ----
  	}
  
! #ifndef MYCROFT_HACK
! 	if (mycroft_hack)
! 		printf("pmap_enter: pmap=%p va=%lx pa=%lx prot=%x wired=%d ftype=%x npte=%08x\n", pmap, va, pa, prot, wired, ftype, npte);
  	//if (pmap_initialized)
  	//	console_debugger();
***************
*** 2260,2284 ****
  		panic("oopss: *pte = 0 in pmap_enter() npte=%08x\n", npte);
  
- 	if (bank != -1) {
- 		flags = 0;
- 		if (prot & VM_PROT_WRITE)
- 			flags |= PT_Wr;
- 		if (va >= VM_MIN_ADDRESS && va < VM_MAXUSER_ADDRESS)
- 			flags |= PT_Us;
- 		if (wired)
- 			flags |= PT_W;
- 		pmap_modify_pv(pmap, va, pv, PT_Wr | PT_Us | PT_W, flags);
- 	}
- 
- 	/*
- 	 * If we are mapping in a page to where the page tables are store
- 	 * then we must be mapping a page table. In this case we should
- 	 * also map the page table into the page directory
- 	 */
- 	if (va >= PROCESS_PAGE_TBLS_BASE && va < VM_MIN_KERNEL_ADDRESS)
- 		panic("pmap_enter: Mapping into page table area\n");
- 
  	/* Better flush the TLB ... */
- 
  	cpu_tlb_flushID_SE(va);
  
--- 2196,2200 ----
***************
*** 2333,2337 ****
  	vm_offset_t pa;
  	int bank, off;
- 	int current;
  	struct pv_entry *pv;
  
--- 2249,2252 ----
***************
*** 2354,2366 ****
  	pv = &vm_physmem[bank].pmseg.pvent[off];
  	/* Update the wired bit in the pv entry for this page. */
! 	current = pmap_modify_pv(pmap, va, pv, PT_W, wired ? PT_W : 0) & PT_W;
! 
! 	/* Update the statistics */
! 	if (wired & !current)
! 		pmap->pm_stats.wired_count++;
! 	else if (!wired && current)
! 		pmap->pm_stats.wired_count--;
  }
- 
  
  /*
--- 2269,2274 ----
  	pv = &vm_physmem[bank].pmseg.pvent[off];
  	/* Update the wired bit in the pv entry for this page. */
! 	(void) pmap_modify_pv(pmap, va, pv, PT_W, wired ? PT_W : 0);
  }
  
  /*
-----8<-----snip-----8<-----snip-----8<-----snip-----8<-----snip-----8<-----