Port-xen archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: [PATCH v2] port/xen: map memory directly in privcmd PRIVCMD_MMAPBATCH



Cherry G.Mathew wrote:
Hi Roger,



     Roger>  Instead of mapping memory when a pagefault happens in the
     Roger>  memory region, map the memory directly on the pivcmd call (as
     Roger>  Linux does).  This fixes the problem with Qemu upstream, that
     Roger>  changes the location of the pfns after calling
     Roger>  xc_map_foreign_bulk.

I'm not sure why exactly this change is needed, could you explain it
again ? Why is it not ok for the backing mfns to be lazy mapped in ?

I've tried explaining the problem here:

http://mail-index.netbsd.org/port-xen/2012/06/15/msg007293.html

Hopefully it's a good starting point.

I see that you use pmap_enter() directly to map the MAs. This means that
uvm(9) doesn't know about the mapping, and if the qemu process were to
be swapped out of memory, the mapping would not be restored on a
subsequent swap in. Is this okay ?

Linux maps those pages as VM_RESERVED which prevents them from being swapped out, should we do the same (PMAP_WIRED)?

I think this change needs a bit more review, I'm Cc:ing the port
maintainer (bouyer@).

Cheers,

Cherry

------------------------------------------------------
Changes since v1:

  * Zero allocated memory for mfn.

  * Check mfn memory is allocated or fail.

  * Move the mfn mask to a define.

Cc: Cherry G.Mathew<cherry%zyx.in@localhost>
Signed-off-by: Roger Pau Monne<roger.pau%citrix.com@localhost>
---
  sys/arch/xen/xen/privcmd.c |   81 ++++++++++++++++++++-----------------------
  1 files changed, 38 insertions(+), 43 deletions(-)

diff --git a/sys/arch/xen/xen/privcmd.c b/sys/arch/xen/xen/privcmd.c
index f584913..7bdcebb 100644
--- a/sys/arch/xen/xen/privcmd.c
+++ b/sys/arch/xen/xen/privcmd.c
@@ -48,6 +48,7 @@ __KERNEL_RCSID(0, "$NetBSD: privcmd.c,v 1.43 2011/06/15 19:51:50 
rmind Exp $");
  #include<xen/xenio.h>

  #define       PRIVCMD_MODE    (S_IRUSR)
+#define        MASK_INVALID    0xF0000000

  /* Magic value is used to mark invalid pages.
   * This must be a value within the page-offset.
@@ -69,6 +70,8 @@ static void privpgop_detach(struct uvm_object *);
  static int privpgop_fault(struct uvm_faultinfo *, vaddr_t , struct vm_page **,
                         int, int, vm_prot_t, int);
  static int privcmd_map_obj(struct vm_map *, vaddr_t, paddr_t *, int, int);
+static int privcmd_map(pmap_t pmap, vaddr_t va0, u_long *mfn,
+                       vm_prot_t prot, int num, int domid);


  static int
@@ -371,16 +374,13 @@ privcmd_ioctl(void *v)
        }
        case IOCTL_PRIVCMD_MMAPBATCH:
        {
-               int i;
                privcmd_mmapbatch_t* pmb = ap->a_data;
-               vaddr_t va0, va;
-               u_long mfn;
-               paddr_t ma;
+               vaddr_t va0;
+               u_long *mfn;
                struct vm_map *vmm;
                struct vm_map_entry *entry;
                vm_prot_t prot;
                pmap_t pmap;
-               vaddr_t trymap;

                vmm =&curlwp->l_proc->p_vmspace->vm_map;
                pmap = vm_map_pmap(vmm);
@@ -400,48 +400,25 @@ privcmd_ioctl(void *v)
                }
                prot = entry->protection;
                vm_map_unlock_read(vmm);
-               
-               maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP);
-               if (maddr == NULL)
-                       return ENOMEM;
-               /* get a page of KVA to check mappins */
-               trymap = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE,
-                   UVM_KMF_VAONLY);
-               if (trymap == 0) {
-                       kmem_free(maddr, sizeof(paddr_t) * pmb->num);
-                       return ENOMEM;
-               }

-               for(i = 0; i<  pmb->num; ++i) {
-                       va = va0 + (i * PAGE_SIZE);
-                       error = copyin(&pmb->arr[i],&mfn, sizeof(mfn));
-                       if (error != 0) {
-                               /* XXX: mappings */
-                               pmap_update(pmap_kernel());
-                               kmem_free(maddr, sizeof(paddr_t) * pmb->num);
-                               uvm_km_free(kernel_map, trymap, PAGE_SIZE,
-                                   UVM_KMF_VAONLY);
-                               return error;
-                       }
-                       ma = ((paddr_t)mfn)<<  PGSHIFT;
-                       if (pmap_enter_ma(pmap_kernel(), trymap, ma, 0,
-                           prot, PMAP_CANFAIL, pmb->dom)) {
-                               mfn |= 0xF0000000;
-                               copyout(&mfn,&pmb->arr[i], sizeof(mfn));
-                               maddr[i] = INVALID_PAGE;
-                       } else {
-                               pmap_remove(pmap_kernel(), trymap,
-                                   trymap + PAGE_SIZE);
-                               maddr[i] = ma;
-                       }
+               mfn = kmem_zalloc(sizeof(u_long) * pmb->num, KM_SLEEP);
+               if (mfn == NULL)
+                       return ENOMEM;
+               error = copyin(pmb->arr, mfn, sizeof(u_long) * pmb->num);
+               if (error != 0) {
+                       /* XXX: mappings */
+                       kmem_free(mfn, sizeof(u_long) * pmb->num);
+                       return error;
                }
-               pmap_update(pmap_kernel());

-               error = privcmd_map_obj(vmm, va0, maddr, pmb->num, pmb->dom);
-               uvm_km_free(kernel_map, trymap, PAGE_SIZE, UVM_KMF_VAONLY);
-
-               if (error != 0)
+               /* Map the memory region directly */
+               error = privcmd_map(pmap, va0, mfn, prot, pmb->num, pmb->dom);
+               if (error) {
+                       kmem_free(mfn, sizeof(u_long) * pmb->num);
                        return error;
+               }
+               copyout(mfn, pmb->arr, sizeof(u_long) * pmb->num);
+               kmem_free(mfn, sizeof(u_long) * pmb->num);

                break;
        }
@@ -452,6 +429,24 @@ privcmd_ioctl(void *v)
        return error;
  }

+static int privcmd_map(pmap_t pmap, vaddr_t va0, u_long *mfn,
+               vm_prot_t prot, int num, int domid)
+{
+       int i;
+       vaddr_t va;
+       paddr_t ma;
+
+       for(i = 0; i<  num; ++i) {
+               va = va0 + (i * PAGE_SIZE);
+               ma = ((paddr_t)mfn[i])<<  PGSHIFT;
+               if (pmap_enter_ma(pmap, va, ma, 0, prot, PMAP_CANFAIL, domid))

I think this should be pmap_enter_ma(pmap, va, ma, 0, prot, prot | PMAP_CANFAIL | PMAP_WIRED, domid)

+                       mfn[i] |= MASK_INVALID;
+       }
+       pmap_update(pmap);
+
+       return 0;
+}
+
  static struct uvm_pagerops privpgops = {
    .pgo_reference = privpgop_reference,
    .pgo_detach = privpgop_detach,



Home | Main Index | Thread Index | Old Index