tech-crypto archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

VIA C3 probe wrong; VIA C3 AES support broken?



The patch below does four things:

        1) It fixes cpu_probe_c3 so it uses the correct CPU
           family and thus actually matches C3 and C7 (and possibly
           Nano) CPUs.

        2) It enables the crypto features of these processors if they
           are not already enabled.

        3) It attaches the VIA RNG as a source of entropy.

        4) It changes the padlock OCF driver so that it registers
           as type "software" not type "hardware" and thus doesn't
           pointlessly suck crypto operations into the kernel -- the
           PadLock instructions are not privileged and can be used
           directly by OpenSSL applications via the "padlock" engine.

Unfortunately, the resulting kernel has dysfunctional FAST_IPSEC ESP; it
seems to get both the AES encryption and decryption of packets wrong.
Since the Padlock code hasn't actually been enabled on anyone's
systems in some time (because of the identcpu bug) I suspect it has
not worked for a while.

The same kernel minus options VIA_PADLOCK does ESP fine via the
cryptosoft backend.  I put a bunch of work into this but I am out
of time (have to do actual, you know, "job" type work) so if anyone
can see what's wrong with the VIA AES driver I'd sure appreciate it!

Index: arch/i386/i386/machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/i386/machdep.c,v
retrieving revision 1.644.4.9
diff -u -r1.644.4.9 machdep.c
--- arch/i386/i386/machdep.c    2 Mar 2009 20:09:04 -0000       1.644.4.9
+++ arch/i386/i386/machdep.c    31 Mar 2009 06:04:53 -0000
@@ -249,6 +249,7 @@
 unsigned int cpu_feature;
 unsigned int cpu_feature2;
 unsigned int cpu_feature_padlock;
+
 int    cpu_class;
 int    i386_fpu_present;
 int    i386_fpu_exception;
Index: arch/x86/include/via_padlock.h
===================================================================
RCS file: /cvsroot/src/sys/arch/x86/include/via_padlock.h,v
retrieving revision 1.2
diff -u -r1.2 via_padlock.h
--- arch/x86/include/via_padlock.h      16 Apr 2008 16:06:51 -0000      1.2
+++ arch/x86/include/via_padlock.h      31 Mar 2009 06:04:54 -0000
@@ -23,6 +23,8 @@
 
 #ifdef _KERNEL
 
+#include <sys/rnd.h>
+#include <sys/callout.h>
 #include <crypto/rijndael/rijndael.h>
 
 /* VIA C3 xcrypt-* instruction context control options */
@@ -55,6 +57,10 @@
        uint8_t op_iv[16];      /* 128 bit aligned */
        void            *op_buf;
 
+       int                     sc_rnd_hz;
+       struct callout          sc_rnd_co;
+       rndsource_element_t     sc_rnd_source;
+
        /* normal softc stuff */
        int32_t         sc_cid;
        int             sc_nsessions;
@@ -64,6 +70,8 @@
 #define VIAC3_SESSION(sid)     ((sid) & 0x0fffffff)
 #define VIAC3_SID(crd,ses)     (((crd) << 28) | ((ses) & 0x0fffffff))
 
+#define VIAC3_RNG_BUFSIZ       16
+
 struct cpu_info;
 
 struct via_padlock {
Index: arch/x86/x86/identcpu.c
===================================================================
RCS file: /cvsroot/src/sys/arch/x86/x86/identcpu.c,v
retrieving revision 1.10.4.2
diff -u -r1.10.4.2 identcpu.c
--- arch/x86/x86/identcpu.c     2 Feb 2009 18:50:01 -0000       1.10.4.2
+++ arch/x86/x86/identcpu.c     31 Mar 2009 06:04:54 -0000
@@ -480,7 +480,7 @@
        struct x86_cache_info *cai;
 
        if (cpu_vendor != CPUVENDOR_IDT ||
-           CPUID2FAMILY(ci->ci_signature) != 5)
+           CPUID2FAMILY(ci->ci_signature) < 6)
                return;
 
        family = CPUID2FAMILY(ci->ci_signature);
@@ -497,25 +497,61 @@
                ci->ci_feature_flags |= descs[3];
        }
 
-       if (model >= 0x9) {
+       if (family > 6 || model > 0x9 || (model == 0x9 && stepping >= 3)) {
                /* Nehemiah or Esther */
                x86_cpuid(0xc0000000, descs);
                lfunc = descs[0];
                if (lfunc >= 0xc0000001) {      /* has ACE, RNG */
-                       x86_cpuid(0xc0000001, descs);
-                       lfunc = descs[3];
-                       if (model > 0x9 || stepping >= 8) {     /* ACE */
-                               if (lfunc & CPUID_VIA_HAS_ACE) {
-                                       ci->ci_padlock_flags = lfunc;
-                                       if ((lfunc & CPUID_VIA_DO_ACE) == 0) {
-                                               msr = rdmsr(MSR_VIA_ACE);
-                                               wrmsr(MSR_VIA_ACE, msr |
-                                                   MSR_VIA_ACE_ENABLE);
-                                               ci->ci_padlock_flags |=
-                                                   CPUID_VIA_DO_ACE;
-                                       }
-                               }
+                   int rng_enable = 0, ace_enable = 0;
+                   x86_cpuid(0xc0000001, descs);
+                   lfunc = descs[3];
+                   ci->ci_padlock_flags = lfunc;
+                   /* Check for and enable RNG */
+                   if (lfunc & CPUID_VIA_HAS_RNG) {
+                       if (!(lfunc & CPUID_VIA_DO_RNG)) {
+                           rng_enable++;
+                           ci->ci_padlock_flags |= CPUID_VIA_HAS_RNG;
+                       }
+                   }
+                   /* Check for and enable ACE (AES-CBC) */
+                   if (lfunc & CPUID_VIA_HAS_ACE) {
+                       if (!(lfunc & CPUID_VIA_DO_ACE)) {
+                           ace_enable++;
+                           ci->ci_padlock_flags |= CPUID_VIA_DO_ACE;
+                       }
+                   }
+                   /* Check for and enable SHA */
+                   if (lfunc & CPUID_VIA_HAS_PHE) {
+                       if (!(lfunc & CPUID_VIA_DO_PHE)) {
+                           ace_enable++;
+                           ci->ci_padlock_flags |= CPUID_VIA_DO_PHE;
                        }
+                   }
+                   /* Check for and enable ACE2 (AES-CTR) */
+                   if (lfunc & CPUID_VIA_HAS_ACE2) {
+                       if (!(lfunc & CPUID_VIA_DO_ACE2)) {
+                           ace_enable++;
+                           ci->ci_padlock_flags |= CPUID_VIA_DO_ACE2;
+                       }
+                   }
+                   /* Check for and enable PMM (modmult engine) */
+                   if (lfunc & CPUID_VIA_HAS_PMM) {
+                       if (!(lfunc & CPUID_VIA_DO_PMM)) {
+                           ace_enable++;
+                           ci->ci_padlock_flags |= CPUID_VIA_DO_PMM;
+                       }
+                   }
+
+                   /* Actually do the enables. */
+                   if (rng_enable) {
+                       msr = rdmsr(MSR_VIA_RNG);
+                       wrmsr(MSR_VIA_RNG, msr | MSR_VIA_RNG_ENABLE);
+                   }
+                   if (ace_enable) {
+                       msr = rdmsr(MSR_VIA_ACE);
+                       wrmsr(MSR_VIA_ACE, msr | MSR_VIA_ACE_ENABLE);
+                   }
+                       
                }
        }
 
@@ -543,7 +579,7 @@
        cai->cai_totalsize = VIA_L1_ECX_DC_SIZE(descs[2]);
        cai->cai_associativity = VIA_L1_ECX_DC_ASSOC(descs[2]);
        cai->cai_linesize = VIA_L1_EDX_IC_LS(descs[2]);
-       if (model == 9 && stepping == 8) {
+       if (family == 6 && model == 9 && stepping == 8) {
                /* Erratum: stepping 8 reports 4 when it should be 2 */
                cai->cai_associativity = 2;
        }
@@ -552,11 +588,11 @@
        cai->cai_totalsize = VIA_L1_EDX_IC_SIZE(descs[3]);
        cai->cai_associativity = VIA_L1_EDX_IC_ASSOC(descs[3]);
        cai->cai_linesize = VIA_L1_EDX_IC_LS(descs[3]);
-       if (model == 9 && stepping == 8) {
+       if (family == 6 && model == 9 && stepping == 8) {
                /* Erratum: stepping 8 reports 4 when it should be 2 */
                cai->cai_associativity = 2;
        }
-
+       
        /*
         * Determine L2 cache/TLB info.
         */
@@ -568,7 +604,7 @@
        x86_cpuid(0x80000006, descs);
 
        cai = &ci->ci_cinfo[CAI_L2CACHE];
-       if (model >= 9) {
+       if (family > 6 || model >= 9) {
                cai->cai_totalsize = VIA_L2N_ECX_C_SIZE(descs[2]);
                cai->cai_associativity = VIA_L2N_ECX_C_ASSOC(descs[2]);
                cai->cai_linesize = VIA_L2N_ECX_C_LS(descs[2]);
Index: arch/x86/x86/via_padlock.c
===================================================================
RCS file: /cvsroot/src/sys/arch/x86/x86/via_padlock.c,v
retrieving revision 1.9
diff -u -r1.9 via_padlock.c
--- arch/x86/x86/via_padlock.c  16 Apr 2008 16:06:52 -0000      1.9
+++ arch/x86/x86/via_padlock.c  31 Mar 2009 06:04:55 -0000
@@ -22,6 +22,12 @@
 #include <sys/cdefs.h>
 __KERNEL_RCSID(0, "$NetBSD: via_padlock.c,v 1.9 2008/04/16 16:06:52 cegger Exp 
$");
 
+#include "rnd.h"
+
+#if NRND == 0
+#error padlock requires rnd pseudo-devices
+#endif
+
 #include "opt_viapadlock.h"
 
 #include <sys/param.h>
@@ -32,6 +38,7 @@
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/cpu.h>
+#include <sys/rnd.h>
 
 #include <x86/specialreg.h>
 
@@ -46,6 +53,8 @@
 
 #ifdef VIA_PADLOCK
 
+char   xxx_via_buffer[1024];
+
 int    via_padlock_crypto_newsession(void *, uint32_t *, struct cryptoini *);
 int    via_padlock_crypto_process(void *, struct cryptop *, int);
 int    via_padlock_crypto_swauth(struct cryptop *, struct cryptodesc *,
@@ -56,19 +65,69 @@
 static __inline void via_padlock_cbc(void *, void *, void *, void *, int,
            void *);
 
-void
-via_padlock_attach(void)
+static void
+via_c3_rnd(void *arg)
 {
-#define VIA_ACE (CPUID_VIA_HAS_ACE|CPUID_VIA_DO_ACE)
-       if ((cpu_feature_padlock & VIA_ACE) != VIA_ACE)
-               return;
+       struct via_padlock_softc *vp_sc = arg;
 
-       struct via_padlock_softc *vp_sc;
-       if ((vp_sc = malloc(sizeof(*vp_sc), M_DEVBUF, M_NOWAIT)) == NULL)
-               return;
-       memset(vp_sc, 0, sizeof(*vp_sc));
+       unsigned int rv, creg0, len = VIAC3_RNG_BUFSIZ;
+       static uint32_t buffer[VIAC3_RNG_BUFSIZ + 2];   /* XXX 2? */
+
+       /*
+        * Sadly, we have to monkey with the coprocessor enable
+        * registers, which are really for the FPU, in order to read
+        * from the RNG.  This supposedly does not and can not clobber
+        * the FPU state, stack, or SIMD registers.
+        */
+       kpreempt_disable();
+       x86_disable_intr();
+       creg0 = rcr0(); 
+       lcr0(creg0 & ~(CR0_EM|CR0_TS)); /* Permit access to SIMD/FPU path */
+       /*
+        * Collect the random data from the C3 RNG into our buffer.
+        * We turn on maximum whitening (is this actually desirable
+        * if we will feed the data to SHA1?) (%edx[0,1] = "11").
+        */
+       __asm __volatile("rep xstorerng"
+                        : "=a" (rv) : "d" (3), "D" (buffer),
+                        "c" (len * sizeof(int)) : "memory", "cc");
+       /* Put CR0 back how it was */
+       lcr0(creg0);
+       x86_enable_intr();
+       kpreempt_enable();
+       rnd_add_data(&vp_sc->sc_rnd_source, buffer, len * sizeof(int),
+                    len * sizeof(int));
+       callout_reset(&vp_sc->sc_rnd_co, vp_sc->sc_rnd_hz, via_c3_rnd, vp_sc);
+}      
+
+static void
+via_c3_rnd_init(struct via_padlock_softc *const vp_sc)
+{
+       if (hz >= 100) {
+           vp_sc->sc_rnd_hz = 10 * hz / 100;
+       } else {
+           vp_sc->sc_rnd_hz = 10;
+       }
+       /* See hifn7751.c re use of RND_FLAG_NO_ESTIMATE */
+       rnd_attach_source(&vp_sc->sc_rnd_source, "padlock",
+                         RND_TYPE_RNG, RND_FLAG_NO_ESTIMATE);
+       callout_init(&vp_sc->sc_rnd_co, 0);
+       /* Call once to prime the pool early and set callout. */
+       via_c3_rnd(vp_sc);
+}
 
-       vp_sc->sc_cid = crypto_get_driverid(0);
+static void
+via_c3_ace_init(struct via_padlock_softc *const vp_sc)
+{
+       /*
+        * There is no reason to call into the kernel to use this
+        * driver from userspace, because the crypto instructions can
+        * be directly accessed there.  Setting CRYPTOCAP_F_SOFTWARE
+        * has approximately the right semantics though the name is
+        * confusing (however, consider that crypto via unprivileged
+        * instructions _is_ "just software" in some sense).
+        */
+       vp_sc->sc_cid = crypto_get_driverid(CRYPTOCAP_F_SOFTWARE);
        if (vp_sc->sc_cid < 0) {
                printf("PadLock: Could not get a crypto driver ID\n");
                free(vp_sc, M_DEVBUF);
@@ -94,8 +153,36 @@
        REGISTER(CRYPTO_RIPEMD160_HMAC_96);
        REGISTER(CRYPTO_RIPEMD160_HMAC);
        REGISTER(CRYPTO_SHA2_HMAC);
+}
+
+void
+via_padlock_attach(void)
+{
+       struct via_padlock_softc *vp_sc;
 
-       printf("PadLock: registered support for AES_CBC\n");
+       printf("%s", xxx_via_buffer);
+
+       if (!((cpu_feature_padlock & CPUID_VIA_HAS_ACE) ||
+             (cpu_feature_padlock & CPUID_VIA_HAS_RNG))) {
+               printf("PadLock: Nothing (%08x ! %08X ! %08X)\n",
+                       cpu_feature_padlock, CPUID_VIA_HAS_ACE,
+                       CPUID_VIA_HAS_RNG);
+               return;         /* Nothing to see here, move along. */
+       }
+
+       if ((vp_sc = malloc(sizeof(*vp_sc), M_DEVBUF, M_NOWAIT)) == NULL)
+               return;
+       memset(vp_sc, 0, sizeof(*vp_sc));
+
+       if (cpu_feature_padlock & CPUID_VIA_HAS_RNG) {
+               via_c3_rnd_init(vp_sc);
+               printf("PadLock: RNG attached\n");
+       }
+
+       if (cpu_feature_padlock & CPUID_VIA_HAS_ACE) {
+               via_c3_ace_init(vp_sc);
+               printf("PadLock: AES-CBC attached\n");
+       }
 }
 
 int


Home | Main Index | Thread Index | Old Index