Port-xen archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[dump-core] "No VCPU context could be grabbed" (was: Re: memory_op hypercall failure: XENMEM_maximum_gpfn)
Jean-Yves Migeon wrote:
So, here is what the code looks like, after some tryouts. See the diff.
Just for information; this is not for a commit. Its current place is
totaly unsuitable (hypervisor_attach), but as I am using memory
allocators, I put it late during boot up process.
When applying it, looks like that the error with xm dump-core
disappeared, now I am getting some VCPU error:
current# xm dump-core 1 /root/core
Dumping core of domain: 1 ...
Error: Failed to dump core: (1, 'Internal error', 'No VCPU context
could be grabbed (14 = Bad address)')
Usage: xm dump-core [-L|--live] [-C|--crash] <Domain> [Filename]
...
I do not presently understand what such an error means (failed to
access vcpu_info[], huh?), I will investigate a bit. Debugging xend to
see what raises this error ends in a panic() (see kern/36183).
So this error is erratic. There is two chances in three that the
dump-core operation ends successfully. The 1/3 ends with a "context
couldn't be grabbed" error.
It looks like that the lock_pages() in
sysutils/xentools/work/xen-3.1.4/tools/libxc/xc_private.c code is at
fault there (see patch-ad):
int lock_pages(void *addr, size_t len)
{
int e = 0;
+ void *laddr = (void *)((u_long)addr & ~0xfffUL);
+ size_t llen = (len + 0xfffUL) & ~0xfffUL;
#ifndef __sun__
- e = mlock(addr, len);
+ e = mlock(laddr, llen);
#endif
return (e);
}
(and its counterpart unlock_pages() ).
Like I said in a previous mail, the roundings do not exactly match the
ones used by NetBSD (this one rounds down, while NetBSD rounds it up for
mlock(2) ).
Removing them in patch-ad for lock_pages and unlock_pages makes the xm
dump-core operation work fine (well it didn't crashed after a whole 20
min loop, while I didn't have to wait more than 10s for it to crash before):
int lock_pages(void *addr, size_t len)
{
int e = 0;
#ifndef __sun__
e = mlock(addr, len);
#endif
return (e);
}
void unlock_pages(void *addr, size_t len)
{
#ifndef __sun__
safe_munlock(addr, len);
#endif
}
So why was it needed? Well, the xm dump-core shares operations with
save/restore, so if xm dump-core does not work properly, save/restore
won't either.
Small remark: the core generated by the dump-core is not the same
(content side) as one generated by savecore(8), so there is no chance
that you could use it for debug like a traditional core file with gdb.
To use it, we need a patched gdbserver version for xen dumps (look for
it in xentools3 pkg). However, I did not manage to check the correctness
of the dump, as gdbserver-xen looks Linux centric, and does not work
(currently) with netbsd. It could work when porting some other stuff
like crash(8), but that is a whole another story.
Please find enclosed patch-ad and diff (for arch.p2m_table).
Questions remaining:
- is there any other possibility than using memory allocators for the
diff? xpmap bootstrap happens early during boot up, and I don't think
that using malloc/kmem_alloc stuff before mm init is possible (I know
that putting that stuff in hypervisor is the ugliest thing I could have
done, but that is a try :o )
- regarding debugging of domU (for my own culture): what are you using?
Serial lines with options KGDB?
Thanks for your attention :)
Cheers,
--
Jean-Yves Migeon
jean-yves.migeon%espci.fr@localhost
Index: hypervisor.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/xen/hypervisor.c,v
retrieving revision 1.36
diff -u -r1.36 hypervisor.c
--- hypervisor.c 16 Apr 2008 18:41:48 -0000 1.36
+++ hypervisor.c 25 May 2008 17:35:25 -0000
@@ -382,6 +382,56 @@
ctrl_if_register_receiver(CMSG_SHUTDOWN,
hypervisor_shutdown_handler, CALLBACK_IN_BLOCKING_CONTEXT);
#endif
+
+#ifdef XEN3
+
+#define vtomfn(va) (vtomach(va) >> PAGE_SHIFT)
+
+ /*
+ * pfn_to_mfn_frame_list_list initialization
+ * required by Xen tools for dump-core/save/restore
+ * These lists consist of 3 layers of page frames, each level
+ * referencing its lower ones through their mfn, and providing
+ * a physical to machine mapping
+ */
+ int i, j;
+ int fpp;
+ unsigned long cur_pfn, max_pfn;
+ unsigned long * l3_p2m_page;
+ unsigned long * l2_p2m_page;
+
+ max_pfn = xen_start_info.nr_pages;
+ /* number of frames referenced in a page */
+ fpp = PAGE_SIZE / sizeof(unsigned long);
+ l3_p2m_page = kmem_alloc(PAGE_SIZE, KM_NOSLEEP);
+ if (l3_p2m_page == NULL)
+ panic("Could not allocate memory for l3_p2m_page");
+
+ for (i = 0; i < fpp; i++) {
+ l2_p2m_page = kmem_alloc(PAGE_SIZE, KM_NOSLEEP);
+ if (l2_p2m_page == NULL)
+ panic("Could not allocate memory for l2_p2m_page");
+ l3_p2m_page[i] = vtomfn((vaddr_t)l2_p2m_page);
+
+ for (j = 0; j < fpp; j++) {
+ /*
+ * index of the pseudo L1 page we are referencing
+ * in L2 page
+ */
+ cur_pfn = (i + j) * fpp;
+ if (cur_pfn >= max_pfn)
+ goto exit_p2m;
+ l2_p2m_page[j] =
vtomfn((vaddr_t)&xpmap_phys_to_machine_mapping[cur_pfn]);
+ }
+ }
+
+exit_p2m:
+ HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
vtomfn((vaddr_t)l3_p2m_page);
+ HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
+
+#endif
}
static int
$NetBSD: patch-ad,v 1.1.1.1 2007/06/14 19:39:46 bouyer Exp $
--- libxc/xc_private.c.orig 2007-05-18 16:45:21.000000000 +0200
+++ libxc/xc_private.c 2007-05-27 13:43:06.000000000 +0200
@@ -10,7 +10,12 @@
#include <stdarg.h>
#include <pthread.h>
-static __thread xc_error last_error = { XC_ERROR_NONE, ""};
+static pthread_key_t last_error_pkey;
+static pthread_once_t last_error_pkey_once = PTHREAD_ONCE_INIT;
+
+static pthread_key_t errbuf_pkey;
+static pthread_once_t errbuf_pkey_once = PTHREAD_ONCE_INIT;
+
#if DEBUG
static xc_error_handler error_handler = xc_default_error_handler;
#else
@@ -23,15 +28,44 @@
fprintf(stderr, "ERROR %s: %s\n", desc, err->message);
}
+static void
+_xc_clean_last_error(void *m)
+{
+ if (m)
+ free(m);
+ pthread_setspecific(last_error_pkey, NULL);
+}
+
+static void
+_xc_init_last_error(void)
+{
+ pthread_key_create(&last_error_pkey, _xc_clean_last_error);
+}
+static xc_error *
+_xc_get_last_error(void) {
+ xc_error *last_error;
+
+ pthread_once(&last_error_pkey_once, _xc_init_last_error);
+
+ last_error = pthread_getspecific(last_error_pkey);
+ if (last_error == NULL) {
+ last_error = malloc(sizeof(xc_error));
+ pthread_setspecific(last_error_pkey, last_error);
+ }
+ return last_error;
+}
+
+
const xc_error *xc_get_last_error(void)
{
- return &last_error;
+ return _xc_get_last_error();
}
void xc_clear_last_error(void)
{
- last_error.code = XC_ERROR_NONE;
- last_error.message[0] = '\0';
+ xc_error *last_error = _xc_get_last_error();
+ last_error->code = XC_ERROR_NONE;
+ last_error->message[0] = '\0';
}
const char *xc_error_code_to_desc(int code)
@@ -64,9 +98,10 @@
static void _xc_set_error(int code, const char *msg)
{
- last_error.code = code;
- strncpy(last_error.message, msg, XC_MAX_ERROR_MSG_LEN - 1);
- last_error.message[XC_MAX_ERROR_MSG_LEN-1] = '\0';
+ xc_error *last_error = _xc_get_last_error();
+ last_error->code = code;
+ strncpy(last_error->message, msg, XC_MAX_ERROR_MSG_LEN - 1);
+ last_error->message[XC_MAX_ERROR_MSG_LEN-1] = '\0';
}
void xc_set_error(int code, const char *fmt, ...)
@@ -84,23 +119,29 @@
errno = saved_errno;
- if ( error_handler != NULL )
- error_handler(&last_error);
+ if ( error_handler != NULL ) {
+ xc_error *last_error = _xc_get_last_error();
+ error_handler(last_error);
+ }
}
int lock_pages(void *addr, size_t len)
{
int e = 0;
+ void *laddr = (void *)((u_long)addr & ~0xfffUL);
+ size_t llen = (len + 0xfffUL) & ~0xfffUL;
#ifndef __sun__
- e = mlock(addr, len);
+ e = mlock(laddr, llen);
#endif
return (e);
}
void unlock_pages(void *addr, size_t len)
{
+ void *laddr = (void *)((u_long)addr & ~0xfffUL);
+ size_t llen = (len + 0xfffUL) & ~0xfffUL;
#ifndef __sun__
- safe_munlock(addr, len);
+ safe_munlock(laddr, llen);
#endif
}
@@ -466,20 +507,43 @@
return new_mfn;
}
+static void
+_xc_clean_errbuf(void * m)
+{
+ if (m)
+ free(m);
+ pthread_setspecific(errbuf_pkey, NULL);
+}
+
+static void
+_xc_init_errbuf(void)
+{
+ pthread_key_create(&errbuf_pkey, _xc_clean_errbuf);
+}
+
char *safe_strerror(int errcode)
{
- static __thread char errbuf[32];
+#define XS_BUFSIZE 32
+ char *errbuf;
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
char *strerror_str;
+ pthread_once(&errbuf_pkey_once, _xc_init_errbuf);
+
+ errbuf = pthread_getspecific(errbuf_pkey);
+ if (errbuf == NULL) {
+ errbuf = malloc(XS_BUFSIZE);
+ pthread_setspecific(errbuf_pkey, errbuf);
+ }
+
/*
* Thread-unsafe strerror() is protected by a local mutex. We copy
* the string to a thread-private buffer before releasing the mutex.
*/
pthread_mutex_lock(&mutex);
strerror_str = strerror(errcode);
- strncpy(errbuf, strerror_str, sizeof(errbuf));
- errbuf[sizeof(errbuf)-1] = '\0';
+ strncpy(errbuf, strerror_str, XS_BUFSIZE);
+ errbuf[XS_BUFSIZE-1] = '\0';
pthread_mutex_unlock(&mutex);
return errbuf;
Home |
Main Index |
Thread Index |
Old Index