Port-xen archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[PATCH v3] port/xen: map memory synchronously in privcmd PRIVCMD_MMAP*



Instead of mapping memory when a pagefault happens in the memory
region, map the foreign memory directly on the privcmd call (as Linux
does). This fixes the problem with Qemu upstream, that changes the
location of the gmfns after calling xc_map_foreign_bulk.

Changes since v2:

 * Set correct flags in pmap_enter_ma (PMAP_WIRED) to prevent
   swapping.

 * Added a function to check for the right protection to use.

 * Changed PRIVCMD_MMAP ioctl to use the same mechanism

Changes since v1:

 * Zero allocated memory for mfn.

 * Check mfn memory is allocated or fail.

 * Move the mfn mask to a define.

Cc: Cherry G.Mathew <cherry%zyx.in@localhost>
Signed-off-by: Roger Pau Monne <roger.pau%citrix.com@localhost>
---
 sys/arch/xen/xen/privcmd.c |  276 +++++++++++++-------------------------------
 1 files changed, 79 insertions(+), 197 deletions(-)

diff --git a/sys/arch/xen/xen/privcmd.c b/sys/arch/xen/xen/privcmd.c
index f584913..a4415d0 100644
--- a/sys/arch/xen/xen/privcmd.c
+++ b/sys/arch/xen/xen/privcmd.c
@@ -48,6 +48,7 @@ __KERNEL_RCSID(0, "$NetBSD: privcmd.c,v 1.43 2011/06/15 
19:51:50 rmind Exp $");
 #include <xen/xenio.h>
 
 #define        PRIVCMD_MODE    (S_IRUSR)
+#define        MASK_INVALID    0xF0000000
 
 /* Magic value is used to mark invalid pages.
  * This must be a value within the page-offset.
@@ -55,20 +56,10 @@ __KERNEL_RCSID(0, "$NetBSD: privcmd.c,v 1.43 2011/06/15 
19:51:50 rmind Exp $");
  */ 
 #define INVALID_PAGE   0xfff
 
-struct privcmd_object {
-       struct uvm_object uobj;
-       paddr_t *maddr; /* array of machine address to map */
-       int     npages;
-       int     domid;
-};
-
-int privcmd_nobjects = 0;
-
-static void privpgop_reference(struct uvm_object *);
-static void privpgop_detach(struct uvm_object *);
-static int privpgop_fault(struct uvm_faultinfo *, vaddr_t , struct vm_page **,
-                        int, int, vm_prot_t, int);
-static int privcmd_map_obj(struct vm_map *, vaddr_t, paddr_t *, int, int);
+static int privcmd_map(pmap_t pmap, vaddr_t va0, u_long *gmfn,
+               vm_prot_t prot, int num, int domid, int allow_failure);
+static int privcmd_getprot(struct vm_map *map, vaddr_t start, off_t size,
+               vm_prot_t *prot);
 
 
 static int
@@ -263,7 +254,6 @@ privcmd_ioctl(void *v)
                kauth_cred_t a_cred;
        } */ *ap = v;
        int error = 0;
-       paddr_t *maddr;
 
        switch (ap->a_command) {
        case IOCTL_PRIVCMD_HYPERCALL:
@@ -335,9 +325,12 @@ privcmd_ioctl(void *v)
                int i, j;
                privcmd_mmap_t *mcmd = ap->a_data;
                privcmd_mmap_entry_t mentry;
-               vaddr_t va;
-               paddr_t ma;
+               u_long *gmfn, gmfn0;
+               vaddr_t va0;
+               off_t size;
+               vm_prot_t prot;
                struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map;
+               pmap_t pmap = vm_map_pmap(vmm);
 
                for (i = 0; i < mcmd->num; i++) {
                        error = copyin(&mcmd->entry[i], &mentry, 
sizeof(mentry));
@@ -347,40 +340,44 @@ privcmd_ioctl(void *v)
                                return EINVAL;
                        if (mentry.va > VM_MAXUSER_ADDRESS)
                                return EINVAL;
-#if 0
-                       if (mentry.va + (mentry.npages << PGSHIFT) >
-                           mrentry->vm_end)
+
+                       va0 = mentry.va & ~PAGE_MASK;
+                       size = ((off_t)mentry.npages << PGSHIFT);
+
+                       /* get protection */
+                       if (privcmd_getprot(vmm, va0, size, &prot))
                                return EINVAL;
-#endif
-                       maddr = kmem_alloc(sizeof(paddr_t) * mentry.npages,
-                           KM_SLEEP);
-                       if (maddr == NULL)
+
+                       /* fetch list of gmfn entries to map */
+                       gmfn = kmem_zalloc(sizeof(u_long) * mentry.npages, 
KM_SLEEP);
+                       if (gmfn == NULL)
                                return ENOMEM;
-                       va = mentry.va & ~PAGE_MASK;
-                       ma = ((paddr_t)mentry.mfn) <<  PGSHIFT; /* XXX ??? */
+                       gmfn0 = mentry.mfn;
                        for (j = 0; j < mentry.npages; j++) {
-                               maddr[j] = ma;
-                               ma += PAGE_SIZE;
+                               gmfn[j] = gmfn0;
+                               gmfn0 += PAGE_SIZE;
                        }
-                       error  = privcmd_map_obj(vmm, va, maddr,
-                           mentry.npages, mcmd->dom);
-                       if (error)
+
+                       /* Map the memory region directly */
+                       error = privcmd_map(pmap, va0, gmfn, prot, 
mentry.npages,
+                                           mcmd->dom, 0);
+                       if (error) {
+                               kmem_free(gmfn, sizeof(u_long) * mentry.npages);
                                return error;
+                       }
+                       kmem_free(gmfn, sizeof(u_long) * mentry.npages);
                }
                break;
        }
        case IOCTL_PRIVCMD_MMAPBATCH:
        {
-               int i;
                privcmd_mmapbatch_t* pmb = ap->a_data;
-               vaddr_t va0, va;
-               u_long mfn;
-               paddr_t ma;
+               vaddr_t va0;
+               u_long *gmfn;
                struct vm_map *vmm;
-               struct vm_map_entry *entry;
                vm_prot_t prot;
                pmap_t pmap;
-               vaddr_t trymap;
+               off_t size;
 
                vmm = &curlwp->l_proc->p_vmspace->vm_map;
                pmap = vm_map_pmap(vmm);
@@ -393,55 +390,32 @@ privcmd_ioctl(void *v)
                if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmb->num)
                        return EINVAL;
 
-               vm_map_lock_read(vmm);
-               if (!uvm_map_lookup_entry(vmm, va0, &entry)) {
-                       vm_map_unlock_read(vmm);
+               /* get protection */
+               size = ((off_t)pmb->num) << PGSHIFT;
+               if (privcmd_getprot(vmm, va0, size, &prot))
                        return EINVAL;
-               }
-               prot = entry->protection;
-               vm_map_unlock_read(vmm);
-               
-               maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP);
-               if (maddr == NULL)
-                       return ENOMEM;
-               /* get a page of KVA to check mappins */
-               trymap = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE,
-                   UVM_KMF_VAONLY);
-               if (trymap == 0) {
-                       kmem_free(maddr, sizeof(paddr_t) * pmb->num);
-                       return ENOMEM;
-               }
 
-               for(i = 0; i < pmb->num; ++i) {
-                       va = va0 + (i * PAGE_SIZE);
-                       error = copyin(&pmb->arr[i], &mfn, sizeof(mfn));
-                       if (error != 0) {
-                               /* XXX: mappings */
-                               pmap_update(pmap_kernel());
-                               kmem_free(maddr, sizeof(paddr_t) * pmb->num);
-                               uvm_km_free(kernel_map, trymap, PAGE_SIZE,
-                                   UVM_KMF_VAONLY);
-                               return error;
-                       }
-                       ma = ((paddr_t)mfn) << PGSHIFT;
-                       if (pmap_enter_ma(pmap_kernel(), trymap, ma, 0,
-                           prot, PMAP_CANFAIL, pmb->dom)) {
-                               mfn |= 0xF0000000;
-                               copyout(&mfn, &pmb->arr[i], sizeof(mfn));
-                               maddr[i] = INVALID_PAGE;
-                       } else {
-                               pmap_remove(pmap_kernel(), trymap,
-                                   trymap + PAGE_SIZE);
-                               maddr[i] = ma;
-                       }
+               /* fetch list of gmfn entries to map */
+               gmfn = kmem_zalloc(sizeof(u_long) * pmb->num, KM_SLEEP);
+               if (gmfn == NULL)
+                       return ENOMEM;
+               error = copyin(pmb->arr, gmfn, sizeof(u_long) * pmb->num);
+               if (error != 0) {
+                       /* XXX: mappings */
+                       kmem_free(gmfn, sizeof(u_long) * pmb->num);
+                       return error;
                }
-               pmap_update(pmap_kernel());
-
-               error = privcmd_map_obj(vmm, va0, maddr, pmb->num, pmb->dom);
-               uvm_km_free(kernel_map, trymap, PAGE_SIZE, UVM_KMF_VAONLY);
 
-               if (error != 0)
+               /* map the memory region directly */
+               error = privcmd_map(pmap, va0, gmfn, prot, pmb->num, pmb->dom, 
1);
+               if (error) {
+                       kmem_free(gmfn, sizeof(u_long) * pmb->num);
                        return error;
+               }
+               pmap_update(pmap);
+               /* copy the result of the mapping to the user space */
+               copyout(gmfn, pmb->arr, sizeof(u_long) * pmb->num);
+               kmem_free(gmfn, sizeof(u_long) * pmb->num);
 
                break;
        }
@@ -452,140 +426,48 @@ privcmd_ioctl(void *v)
        return error;
 }
 
-static struct uvm_pagerops privpgops = {
-  .pgo_reference = privpgop_reference,
-  .pgo_detach = privpgop_detach,
-  .pgo_fault = privpgop_fault,
-};
-
-static void
-privpgop_reference(struct uvm_object *uobj)
-{
-       mutex_enter(uobj->vmobjlock);
-       uobj->uo_refs++;
-       mutex_exit(uobj->vmobjlock);
-}
-
-static void
-privpgop_detach(struct uvm_object *uobj)
+static int privcmd_map(pmap_t pmap, vaddr_t va0, u_long *gmfn,
+               vm_prot_t prot, int num, int domid, int allow_failure)
 {
-       struct privcmd_object *pobj = (struct privcmd_object *)uobj;
-
-       mutex_enter(uobj->vmobjlock);
-       if (uobj->uo_refs > 1) {
-               uobj->uo_refs--;
-               mutex_exit(uobj->vmobjlock);
-               return;
-       }
-       mutex_exit(uobj->vmobjlock);
-       kmem_free(pobj->maddr, sizeof(paddr_t) * pobj->npages);
-       uvm_obj_destroy(uobj, true);
-       kmem_free(pobj, sizeof(struct privcmd_object));
-       privcmd_nobjects--;
-}
-
-static int
-privpgop_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
-    int npages, int centeridx, vm_prot_t access_type, int flags)
-{
-       struct vm_map_entry *entry = ufi->entry;
-       struct uvm_object *uobj = entry->object.uvm_obj;
-       struct privcmd_object *pobj = (struct privcmd_object*)uobj;
-       int maddr_i, i, error = 0;
-
-       /* compute offset from start of map */
-       maddr_i = (entry->offset + (vaddr - entry->start)) >> PAGE_SHIFT;
-       if (maddr_i + npages > pobj->npages) {
-               return EINVAL;
-       }
-       for (i = 0; i < npages; i++, maddr_i++, vaddr+= PAGE_SIZE) {
-               if ((flags & PGO_ALLPAGES) == 0 && i != centeridx)
-                       continue;
-               if (pps[i] == PGO_DONTCARE)
+       int i;
+       vaddr_t va;
+       paddr_t gfn;
+       int rc;
+
+       for(i = 0; i < num; ++i) {
+               va = va0 + (i * PAGE_SIZE);
+               gfn = ((paddr_t)gmfn[i]) << PGSHIFT;
+               rc = pmap_enter_ma(pmap, va, gfn, 0, prot,
+                                  prot | PMAP_CANFAIL | PMAP_WIRED, domid);
+               if (rc && allow_failure) {
+                       gmfn[i] |= MASK_INVALID;
                        continue;
-               if (pobj->maddr[maddr_i] == INVALID_PAGE) {
-                       /* This has already been flagged as error. */
-                       error = EFAULT;
-                       break;
                }
-               error = pmap_enter_ma(ufi->orig_map->pmap, vaddr,
-                   pobj->maddr[maddr_i], 0, ufi->entry->protection,
-                   PMAP_CANFAIL | ufi->entry->protection,
-                   pobj->domid);
-               if (error == ENOMEM) {
-                       error = ERESTART;
-                       break;
-               }
-               if (error) {
-                       /* XXX for proper ptp accountings */
-                       pmap_remove(ufi->orig_map->pmap, vaddr, 
-                           vaddr + PAGE_SIZE);
+               if (rc && !allow_failure) {
+                       return rc;
                }
        }
-       pmap_update(ufi->orig_map->pmap);
-       uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
 
-       if (error == ERESTART) {
-               uvm_wait("privpgop_fault");
-       }
-       return error;
+       return 0;
 }
 
-static int
-privcmd_map_obj(struct vm_map *map, vaddr_t start, paddr_t *maddr,
-               int npages, int domid)
+static vm_prot_t privcmd_getprot(struct vm_map *map, vaddr_t start, off_t size,
+               vm_prot_t *prot)
 {
-       struct privcmd_object *obj;
-       int error;
-       uvm_flag_t uvmflag;
-       vaddr_t newstart = start;
-       vm_prot_t prot;
-       off_t size = ((off_t)npages << PGSHIFT);
-
        vm_map_lock_read(map);
-       /* get protections. This also check for validity of mapping */
+       /* get protections. This also checks for the validity of the mapping */
        if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_WRITE))
-               prot = VM_PROT_READ | VM_PROT_WRITE;
+               *prot = VM_PROT_READ | VM_PROT_WRITE;
        else if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_READ))
-               prot = VM_PROT_READ;
+               *prot = VM_PROT_READ;
        else {
-               printf("uvm_map_checkprot 0x%lx -> 0x%lx "
-                   "failed\n",
-                   start, (unsigned long)(start + size - 1));
+               printf("uvm_map_checkprot 0x%lx -> 0x%lx failed\n",
+                      start, (unsigned long)(start + size - 1));
                vm_map_unlock_read(map);
-               kmem_free(maddr, sizeof(paddr_t) * npages);
                return EINVAL;
        }
        vm_map_unlock_read(map);
-       /* remove current entries */
-       uvm_unmap1(map, start, start + size, 0);
 
-       obj = kmem_alloc(sizeof(struct privcmd_object), KM_SLEEP);
-       if (obj == NULL) {
-               kmem_free(maddr, sizeof(paddr_t) * npages);
-               return ENOMEM;
-       }
-
-       privcmd_nobjects++;
-       uvm_obj_init(&obj->uobj, &privpgops, true, 1);
-       mutex_enter(obj->uobj.vmobjlock);
-       obj->maddr = maddr;
-       obj->npages = npages;
-       obj->domid = domid;
-       mutex_exit(obj->uobj.vmobjlock);
-       uvmflag = UVM_MAPFLAG(prot, prot, UVM_INH_NONE, UVM_ADV_NORMAL,
-           UVM_FLAG_FIXED | UVM_FLAG_NOMERGE);
-       error = uvm_map(map, &newstart, size, &obj->uobj, 0, 0, uvmflag);
-
-       if (error) {
-               if (obj)
-                       obj->uobj.pgops->pgo_detach(&obj->uobj);
-               return error;
-       }
-       if (newstart != start) {
-               printf("uvm_map didn't give us back our vm space\n");
-               return EINVAL;
-       }
        return 0;
 }
 
-- 
1.7.7.5 (Apple Git-26)



Home | Main Index | Thread Index | Old Index