Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/xen/xen [merging from cherry-xenmp]



details:   https://anonhg.NetBSD.org/src/rev/fa2bd8a0bae2
branches:  trunk
changeset: 771284:fa2bd8a0bae2
user:      cherry <cherry%NetBSD.org@localhost>
date:      Fri Nov 18 06:01:50 2011 +0000

description:
[merging from cherry-xenmp]
 - Make clock MP aware.
 - Bring in fixes that bouyer@ brought in via:
   cvs rdiff -u -r1.54.6.4 -r1.54.6.5 src/sys/arch/xen/xen/clock.c

Thanks to riz@ for testing on dom0

diffstat:

 sys/arch/xen/xen/clock.c |  346 ++++++++++++++++++++++++++--------------------
 1 files changed, 196 insertions(+), 150 deletions(-)

diffs (truncated from 519 to 300 lines):

diff -r 963a9acd199d -r fa2bd8a0bae2 sys/arch/xen/xen/clock.c
--- a/sys/arch/xen/xen/clock.c  Fri Nov 18 04:20:16 2011 +0000
+++ b/sys/arch/xen/xen/clock.c  Fri Nov 18 06:01:50 2011 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: clock.c,v 1.56 2011/09/20 00:12:24 jym Exp $   */
+/*     $NetBSD: clock.c,v 1.57 2011/11/18 06:01:50 cherry Exp $        */
 
 /*
  *
@@ -29,7 +29,7 @@
 #include "opt_xen.h"
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: clock.c,v 1.56 2011/09/20 00:12:24 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: clock.c,v 1.57 2011/11/18 06:01:50 cherry Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -43,6 +43,7 @@
 #include <xen/xen.h>
 #include <xen/hypervisor.h>
 #include <xen/evtchn.h>
+#include <xen/xen3-public/vcpu.h>
 #include <machine/cpu_counter.h>
 
 #include <dev/clock_subr.h>
@@ -66,22 +67,32 @@
 };
 
 /* These are periodically updated in shared_info, and then copied here. */
-static volatile uint64_t shadow_tsc_stamp;
-static volatile uint64_t shadow_system_time;
-static volatile unsigned long shadow_time_version; /* XXXSMP */
-static volatile uint32_t shadow_freq_mul;
-static volatile int8_t shadow_freq_shift;
-static volatile struct timespec shadow_ts;
+struct shadow {
+       uint64_t tsc_stamp;
+       uint64_t system_time;
+       unsigned long time_version; /* XXXSMP */
+       uint32_t freq_mul;
+       int8_t freq_shift;
+       struct timespec ts;
+};
 
-/* The time when the last hardclock(9) call should have taken place. */
-static volatile uint64_t processed_system_time;
+/* Protects volatile variables ci_shadow & xen_clock_bias */
+static kmutex_t tmutex;
+
+/* Per CPU shadow time values */
+static volatile struct shadow ci_shadow[MAXCPUS];
+
+/* The time when the last hardclock(9) call should have taken place,
+ * per cpu.
+ */
+static volatile uint64_t vcpu_system_time[MAXCPUS];
 
 /*
  * The clock (as returned by xen_get_timecount) may need to be held
  * back to maintain the illusion that hardclock(9) was called when it
  * was supposed to be, not when Xen got around to scheduling us.
  */
-static volatile uint64_t xen_clock_bias = 0;
+static volatile uint64_t xen_clock_bias[MAXCPUS];
 
 #ifdef DOM0OPS
 /* If we're dom0, send our time to Xen every minute or so. */
@@ -96,25 +107,30 @@
  * area.  Must be called at splhigh (per timecounter requirements).
  */
 static void
-get_time_values_from_xen(void)
+get_time_values_from_xen(struct cpu_info *ci)
 {
-       volatile struct vcpu_time_info *t = &curcpu()->ci_vcpu->time;
+
+       volatile struct shadow *shadow = &ci_shadow[ci->ci_cpuid];
+
+       volatile struct vcpu_time_info *t = &ci->ci_vcpu->time;
        uint32_t tversion;
 
+       KASSERT(mutex_owned(&tmutex));
+
        do {
-               shadow_time_version = t->version;
+               shadow->time_version = t->version;
                xen_rmb();
-               shadow_tsc_stamp = t->tsc_timestamp;
-               shadow_system_time = t->system_time;
-               shadow_freq_mul = t->tsc_to_system_mul;
-               shadow_freq_shift = t->tsc_shift;
+               shadow->tsc_stamp = t->tsc_timestamp;
+               shadow->system_time = t->system_time;
+               shadow->freq_mul = t->tsc_to_system_mul;
+               shadow->freq_shift = t->tsc_shift;
                xen_rmb();
-       } while ((t->version & 1) || (shadow_time_version != t->version));
+       } while ((t->version & 1) || (shadow->time_version != t->version));
        do {
                tversion = HYPERVISOR_shared_info->wc_version;
                xen_rmb();
-               shadow_ts.tv_sec = HYPERVISOR_shared_info->wc_sec;
-               shadow_ts.tv_nsec = HYPERVISOR_shared_info->wc_nsec;
+               shadow->ts.tv_sec = HYPERVISOR_shared_info->wc_sec;
+               shadow->ts.tv_nsec = HYPERVISOR_shared_info->wc_nsec;
                xen_rmb();
        } while ((HYPERVISOR_shared_info->wc_version & 1) ||
            (tversion != HYPERVISOR_shared_info->wc_version));
@@ -124,12 +140,17 @@
  * Are the values we have up to date?
  */
 static inline int
-time_values_up_to_date(void)
+time_values_up_to_date(struct cpu_info *ci)
 {
        int rv;
 
+       volatile struct shadow *shadow = &ci_shadow[ci->ci_cpuid];
+
+       KASSERT(ci != NULL);
+       KASSERT(mutex_owned(&tmutex));
+
        xen_rmb();
-       rv = shadow_time_version == curcpu()->ci_vcpu->time.version;
+       rv = shadow->time_version == ci->ci_vcpu->time.version;
        xen_rmb();
 
        return rv;
@@ -164,52 +185,40 @@
  * Must be called at splhigh (per timecounter requirements).
  */
 static uint64_t
-get_tsc_offset_ns(void)
+get_tsc_offset_ns(struct cpu_info *ci)
 {
        uint64_t tsc_delta, offset;
+       volatile struct shadow *shadow = &ci_shadow[ci->ci_cpuid];
 
-       tsc_delta = cpu_counter() - shadow_tsc_stamp;
-       offset = scale_delta(tsc_delta, shadow_freq_mul,
-           shadow_freq_shift);
-#ifdef XEN_CLOCK_DEBUG
-       if (tsc_delta > 100000000000ULL || offset > 10000000000ULL)
-               printf("get_tsc_offset_ns: tsc_delta=%llu offset=%llu"
-                   " pst=%llu sst=%llu\n", tsc_delta, offset,
-                   processed_system_time, shadow_system_time);
-#endif
+       KASSERT(mutex_owned(&tmutex));
+       tsc_delta = cpu_counter() - shadow->tsc_stamp;
+       offset = scale_delta(tsc_delta, shadow->freq_mul,
+           shadow->freq_shift);
 
        return offset;
 }
 
 /*
- * Returns the current system_time, taking care that the timestamp
- * used is valid for the TSC measurement in question.  Xen2 doesn't
- * ensure that this won't step backwards, so we enforce monotonicity
- * on our own in that case.  Must be called at splhigh.
+ * Returns the current system_time on given vcpu, taking care that the
+ * timestamp used is valid for the TSC measurement in question.  Xen2
+ * doesn't ensure that this won't step backwards, so we enforce
+ * monotonicity on our own in that case.  Must be called at splhigh.
  */
 static uint64_t
-get_system_time(void)
+get_vcpu_time(struct cpu_info *ci)
 {
        uint64_t offset, stime;
+       volatile struct shadow *shadow = &ci_shadow[ci->ci_cpuid];
        
-       for (;;) {
-               offset = get_tsc_offset_ns();
-               stime = shadow_system_time + offset;
                
+       KASSERT(mutex_owned(&tmutex));
+       do {
+               get_time_values_from_xen(ci);
+               offset = get_tsc_offset_ns(ci);
+               stime = shadow->system_time + offset;
                /* if the timestamp went stale before we used it, refresh */
-               if (time_values_up_to_date()) {
-                       /*
-                        * Work around an intermittent Xen2 bug where, for
-                        * a period of 1<<32 ns, currently running domains
-                        * don't get their timer events as usual (and also
-                        * aren't preempted in favor of other runnable
-                        * domains).  Setting the timer into the past in
-                        * this way causes it to fire immediately.
-                        */
-                       break;
-               }
-               get_time_values_from_xen();
-       }
+
+       } while (!time_values_up_to_date(ci));
 
        return stime;
 }
@@ -218,16 +227,22 @@
 xen_wall_time(struct timespec *wt)
 {
        uint64_t nsec;
-       int s;
+
+       struct cpu_info *ci = curcpu();
+       volatile struct shadow *shadow = &ci_shadow[ci->ci_cpuid];
 
-       s = splhigh();
-       get_time_values_from_xen();
-       *wt = shadow_ts;
-       nsec = wt->tv_nsec;
+       mutex_enter(&tmutex);
+       do {
+               /*
+                * Under Xen3, shadow->ts is the wall time less system time
+                * get_vcpu_time() will update shadow
+                */
+               nsec = get_vcpu_time(curcpu());
+               *wt = shadow->ts;
+               nsec += wt->tv_nsec;
+       } while (!time_values_up_to_date(ci));
+       mutex_exit(&tmutex);
 
-       /* Under Xen3, this is the wall time less system time */
-       nsec += get_system_time();
-       splx(s);
        wt->tv_sec += nsec / 1000000000L;
        wt->tv_nsec = nsec % 1000000000L;
 }
@@ -253,8 +268,6 @@
 #else
        xen_platform_op_t op;
 #endif
-       int s;
-
        if (xendomain_is_privileged()) {
                /* needs to set the RTC chip too */
                struct clock_ymdhms dt;
@@ -269,9 +282,9 @@
                /* XXX is rtc_offset handled correctly everywhere? */
                op.u.settime.secs        = tvp->tv_sec;
                op.u.settime.nsecs       = tvp->tv_usec * 1000;
-               s = splhigh();
-               op.u.settime.system_time = get_system_time();
-               splx(s);
+               mutex_enter(&tmutex);
+               op.u.settime.system_time = get_vcpu_time(curcpu());
+               mutex_exit(&tmutex);
 #if __XEN_INTERFACE_VERSION__ < 0x00030204
                return HYPERVISOR_dom0_op(&op);
 #else
@@ -300,14 +313,16 @@
 void
 xen_delay(unsigned int n)
 {
+       struct cpu_info *ci = curcpu();
+       volatile struct shadow *shadow = &ci_shadow[ci->ci_cpuid];
+
        if (n < 500000) {
                /*
-                * shadow_system_time is updated every hz tick, it's not
+                * shadow->system_time is updated every hz tick, it's not
                 * precise enough for short delays. Use the CPU counter
                 * instead. We assume it's working at this point.
                 */
                uint64_t cc, cc2, when;
-               struct cpu_info *ci = curcpu();
 
                cc = cpu_counter();
                when = cc + (uint64_t)n * cpu_frequency(ci) / 1000000LL;
@@ -324,18 +339,18 @@
                return;
        } else {
                uint64_t when;
-               int s;
-               /* for large delays, shadow_system_time is OK */
-               
-               s = splhigh();
-               get_time_values_from_xen();
-               when = shadow_system_time + n * 1000;
-               while (shadow_system_time < when) {
-                       splx(s);
-                       s = splhigh();
-                       get_time_values_from_xen();
+
+               /* for large delays, shadow->system_time is OK */
+               mutex_enter(&tmutex);
+               get_time_values_from_xen(ci);
+               when = shadow->system_time + n * 1000;
+               while (shadow->system_time < when) {
+                       mutex_exit(&tmutex);
+                       HYPERVISOR_yield();
+                       mutex_enter(&tmutex);
+                       get_time_values_from_xen(ci);
                }
-               splx(s);
+               mutex_exit(&tmutex);



Home | Main Index | Thread Index | Old Index