tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: Patch: cprng_fast performance - please review.
On Wed, Apr 16, 2014 at 09:52:22PM -0400, Thor Lancelot Simon wrote:
>
> Attached is a patch which makes cprng_fast per-CPU and lockless. *IT IS NOT
> WELL TESTED YET (I haven't even run test vectors) AND IS ONLY FOR REVIEW.*
New diff, with some missing files and incorporating some more comments
from Taylor.
Thor
? kern/.init_main.c.swp
? sys/.cprng.h.swo
Index: conf/files
===================================================================
RCS file: /cvsroot/src/sys/conf/files,v
retrieving revision 1.1090
diff -u -p -r1.1090 files
--- conf/files 1 Apr 2014 17:49:30 -0000 1.1090
+++ conf/files 17 Apr 2014 03:17:18 -0000
@@ -160,6 +160,7 @@ include "crypto/cast128/files.cast128"
include "crypto/rijndael/files.rijndael"
include "crypto/skipjack/files.skipjack"
include "crypto/camellia/files.camellia"
+include "crypto/hc128/files.hc128"
# General-purpose crypto processing framework.
include "opencrypto/files.opencrypto"
Index: crypto/hc128/files.hc128
===================================================================
RCS file: crypto/hc128/files.hc128
diff -N crypto/hc128/files.hc128
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ crypto/hc128/files.hc128 17 Apr 2014 03:17:18 -0000
@@ -0,0 +1,5 @@
+# $NetBSD: $
+
+define hc128
+
+file crypto/hc128/hc128.c
Index: crypto/hc128/hc128.c
===================================================================
RCS file: crypto/hc128/hc128.c
diff -N crypto/hc128/hc128.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ crypto/hc128/hc128.c 17 Apr 2014 03:17:18 -0000
@@ -0,0 +1,162 @@
+/* $NetBSD: $ */
+
+/* Author: Lucas Clemente Vella
+ * Source code placed into public domain. */
+
+/*
+ * This is the HC-128 stream cipher, one of the eStream Profile 1
+ * selected ciphers. It is based on SHA-256.
+ *
+ * This cipher has a very large key setup time (estimated at 74,000
+ * cycles on a modern x86 CPU) but is quite efficient once keyed:
+ * around 3 cycles per byte. Since it produces a stream of 32-bit
+ * values, it is well suited for use as a kernel RNG, usually requiring
+ * no output buffering and wasting little of the output stream.
+ */
+
+#include <crypto/hc128/hc128.h>
+
+static inline uint32_t
+rotl(uint32_t x, unsigned int n)
+{
+ return (x << n) | (x >> (32-n));
+}
+
+static inline unsigned int
+m512(unsigned int x)
+{
+ static const unsigned int mask = 0x1ff; /* 511 mask, for mod 512 */
+ return x & mask;
+}
+
+static inline uint32_t
+f1(uint32_t x)
+{
+ return rotl(x, 25) ^ rotl(x, 14) ^ (x >> 3);
+}
+
+static inline uint32_t
+f2(uint32_t x)
+{
+ return rotl(x, 15) ^ rotl(x, 13) ^ (x >> 10);
+}
+
+static inline uint32_t
+g1(uint32_t x, uint32_t y, uint32_t z)
+{
+ return (rotl(x, 22) ^ rotl(z, 9)) + rotl(y, 24);
+}
+
+static inline uint32_t
+g2(uint32_t x, uint32_t y, uint32_t z)
+{
+ return (rotl(x, 10) ^ rotl(z, 23)) + rotl(y, 8);
+}
+
+static inline uint32_t
+h(const uint32_t *qp, uint32_t x)
+{
+ return qp[x & 0xFFu] + qp[256 + ((x >> 16) & 0xFFu)];
+}
+
+static inline uint32_t
+round_expression(uint32_t *pq, const uint32_t *qp,
+ uint32_t (*g)(uint32_t x, uint32_t y, uint32_t z),
+ uint16_t i)
+{
+ pq[i] += g(pq[m512(i-3u)], pq[m512(i-10u)], pq[m512(i+1u)]);
+ return pq[i] ^ h(qp, pq[m512(i-12u)]);
+}
+
+static inline uint32_t
+pack_littleendian(const uint8_t *v)
+{
+#ifdef LITTLE_ENDIAN
+ return *((const uint32_t*)v);
+#else
+ return (uint32_t)v[3] << 24
+ | (uint32_t)v[2] << 16
+ | (uint32_t)v[1] << 8
+ | (uint32_t)v[0];
+#endif
+}
+
+static inline void
+unpack_littleendian(uint32_t value, uint8_t *v)
+{
+#if BYTE_ORDER == LITTLE_ENDIAN
+ *((uint32_t*)v) = value;
+#else
+ int i;
+ for(i = 0; i < 4; ++i) {
+ v[i] = value >> (i * 8);
+ }
+#endif
+}
+
+/** Initialize HC-128 state with key and IV.
+ *
+ * Contrary to the other implemented algorithms, the key and IV are taken
+ * in a single function to initialize the state. This approach was chosen
+ * here because of the nature of the algorithm, that keeps no intermediate
+ * state between the key setting and the IV setting.
+ *
+ * Notice: an IV should never be reused.
+ *
+ * @param state The uninitialized state, it will be ready to
+ * encryption/decryption afterwards.
+ * @param key 16 bytes buffer containing the 128-bit key. The buffer must
+ * be aligned to at least 4 bytes (depending on the platform it may or may
+ * not work with unaligned memory).
+ * @param iv 16 bytes buffer containing the IV.
+ */
+void
+hc128_init(hc128_state_t *state, const uint8_t *key, const uint8_t *iv)
+{
+ unsigned int i;
+ uint32_t w[1280], *p = state->p, *q = state->q;
+
+ for(i = 0; i < 4; ++i) {
+ w[i] = w[i+4] = pack_littleendian(key + 4 * i);
+ w[i+8] = w[i+12] = pack_littleendian(iv + 4 * i);
+ }
+
+ for(i = 16; i < 1280; ++i) {
+ w[i] = f2(w[i-2]) + w[i-7] + f1(w[i-15]) + w[i-16] + i;
+ }
+
+ for(i = 0; i < 512; ++i) {
+ p[i] = w[i+256];
+ q[i] = w[i+768];
+ }
+
+ for(i = 0; i < 512; ++i) {
+ p[i] = round_expression(p, q, g1, i);
+ }
+
+ for(i = 0; i < 512; ++i) {
+ q[i] = round_expression(q, p, g2, i);
+ }
+
+ state->i = 0;
+}
+
+/** Performs one round of the algorithm.
+ *
+ * @param state The algorithm state.
+ * @param stream A 4 byte buffer where the generated stream will be stored.
+ * Must be aligned.
+ */
+void
+hc128_extract(hc128_state_t *state, uint8_t *stream)
+{
+ register uint32_t ret;
+
+ uint16_t i = state->i;
+ state->i = (i + 1u) & 1023u;
+
+ ret = (i < 512) ? round_expression(state->p, state->q, g1, i) :
+ round_expression(state->q, state->p, g2, m512(i));
+
+ unpack_littleendian(ret, stream);
+}
Index: crypto/hc128/hc128.h
===================================================================
RCS file: crypto/hc128/hc128.h
diff -N crypto/hc128/hc128.h
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ crypto/hc128/hc128.h 17 Apr 2014 03:17:18 -0000
@@ -0,0 +1,22 @@
+/* $NetBSD: $ */
+
+/* Author: Lucas Clemente Vella
+ * Source code placed into public domain. */
+
+#ifndef _CRYPTO_HC128_H_
+#define _CRYPTO_HC128_H_
+
+#include <sys/types.h>
+
+typedef struct
+{
+ uint32_t p[512];
+ uint32_t q[512];
+ uint16_t i;
+} hc128_state_t;
+
+void hc128_init(hc128_state_t *, const uint8_t *, const uint8_t *);
+
+void hc128_extract(hc128_state_t *, uint8_t *);
+
+#endif
Index: kern/init_main.c
===================================================================
RCS file: /cvsroot/src/sys/kern/init_main.c,v
retrieving revision 1.454.2.1
diff -u -p -r1.454.2.1 init_main.c
--- kern/init_main.c 7 Apr 2014 02:20:00 -0000 1.454.2.1
+++ kern/init_main.c 17 Apr 2014 03:17:19 -0000
@@ -497,6 +497,8 @@ main(void)
/* Initialize the kernel strong PRNG. */
kern_cprng = cprng_strong_create("kernel", IPL_VM,
CPRNG_INIT_ANY|CPRNG_REKEY_ANY);
+
+ cprng_fast_init();
/* Initialize interfaces. */
ifinit1();
Index: kern/subr_cprng.c
===================================================================
RCS file: /cvsroot/src/sys/kern/subr_cprng.c,v
retrieving revision 1.23
diff -u -p -r1.23 subr_cprng.c
--- kern/subr_cprng.c 17 Jan 2014 02:12:48 -0000 1.23
+++ kern/subr_cprng.c 17 Apr 2014 03:17:19 -0000
@@ -43,6 +43,7 @@ __KERNEL_RCSID(0, "$NetBSD: subr_cprng.c
#include <sys/kmem.h>
#include <sys/lwp.h>
#include <sys/once.h>
+#include <sys/percpu.h>
#include <sys/poll.h> /* XXX POLLIN/POLLOUT/&c. */
#include <sys/select.h>
#include <sys/systm.h>
@@ -54,6 +55,7 @@ __KERNEL_RCSID(0, "$NetBSD: subr_cprng.c
#endif
#include <crypto/nist_ctr_drbg/nist_ctr_drbg.h>
+#include <crypto/hc128/hc128.h>
#if defined(__HAVE_CPU_COUNTER)
#include <machine/cpu_counter.h>
@@ -72,6 +74,13 @@ static void cprng_strong_rngtest(struct
static rndsink_callback_t cprng_strong_rndsink_callback;
+percpu_t *percpu_cprng_fast_ctx;
+static int cprng_fast_initialized;
+
+static void cprng_fast_randrekey(cprng_fast_ctx_t *);
+
+void *cprng_fast_rekey_softintr = NULL;
+
void
cprng_init(void)
{
@@ -103,10 +112,11 @@ cprng_counter(void)
return cpu_counter32();
#endif
if (__predict_false(cold)) {
+ static int ctr;
/* microtime unsafe if clock not running yet */
- return 0;
+ return ctr++;
}
- microtime(&tv);
+ getmicrotime(&tv);
return (tv.tv_sec * 1000000 + tv.tv_usec);
}
@@ -532,8 +542,16 @@ sysctl_kern_urnd(SYSCTLFN_ARGS)
}
/*
- * sysctl helper routine for kern.arandom node. Picks a random number
- * for you.
+ * sysctl helper routine for kern.arandom node. Fills the supplied
+ * structure with random data for you.
+ *
+ * This node was originally declared as type "int" but its implementation
+ * in OpenBSD, whence it came, would happily return up to 8K of data if
+ * requested. Evidently this was used to key RC4 in userspace.
+ *
+ * In NetBSD, the libc stack-smash-protection code reads 64 bytes
+ * from here at every program startup. So though it would be nice
+ * to make this node return only 32 or 64 bits, we can't. Too bad!
*/
static int
sysctl_kern_arnd(SYSCTLFN_ARGS)
@@ -542,31 +560,145 @@ sysctl_kern_arnd(SYSCTLFN_ARGS)
void *v;
struct sysctlnode node = *rnode;
- if (*oldlenp == 0)
+ switch (*oldlenp) {
+ case 0:
return 0;
+ default:
+ if (*oldlenp > 256) {
+ return E2BIG;
+ }
+ v = kmem_alloc(*oldlenp, KM_SLEEP);
+ cprng_fast(v, *oldlenp);
+ node.sysctl_data = v;
+ node.sysctl_size = *oldlenp;
+ error = sysctl_lookup(SYSCTLFN_CALL(&node));
+ kmem_free(v, *oldlenp);
+ return error;
+ }
+}
+
+static void
+cprng_fast_randrekey(cprng_fast_ctx_t *ctx)
+{
+ uint8_t key[16], iv[16];
+ hc128_state_t tempstate;
+ int s;
+
+ int have_initial = rnd_initial_entropy;
+
+ cprng_strong(kern_cprng, key, sizeof(key), FASYNC);
+ cprng_strong(kern_cprng, iv, sizeof(iv), FASYNC);
+
+ /* Rekey the hc128 state - expensive, don't do this at splhigh. */
+ hc128_init(&ctx->hc128, key, iv);
+ explicit_memset(key, 0, sizeof(key));
+ explicit_memset(iv, 0, sizeof(iv));
+
+ s = splhigh();
+ memcpy(&ctx->hc128, &tempstate, sizeof(tempstate));
+ splx(s);
+
+ explicit_memset(&tempstate, 0, sizeof(tempstate));
+
/*
- * This code used to allow sucking 8192 bytes at a time out
- * of the kernel arc4random generator. Evidently there is some
- * very old OpenBSD application code that may try to do this.
- *
- * Note that this node is documented as type "INT" -- 4 or 8
- * bytes, not 8192.
- *
- * We continue to support this abuse of the "len" pointer here
- * but only 256 bytes at a time, as, anecdotally, the actual
- * application use here was to generate RC4 keys in userspace.
- *
- * Support for such large requests will probably be removed
- * entirely in the future.
+ * Reset for next reseed cycle.
*/
- if (*oldlenp > 256)
- return E2BIG;
+ ctx->nextreseed = time_uptime +
+ (have_initial ? CPRNGF_RESEED_SECONDS : 0);
+ ctx->numbytes = 0;
+}
+
+static void
+cprng_fast_init_ctx(void *v,
+ void *arg __unused,
+ struct cpu_info * ci __unused)
+{
+ cprng_fast_ctx_t *ctx = v;
+ cprng_fast_randrekey(ctx);
+}
+
+static void
+cprng_fast_rekey_one(void *arg __unused)
+{
+ cprng_fast_ctx_t *ctx = percpu_getref(percpu_cprng_fast_ctx);
+
+ cprng_fast_randrekey(ctx);
+ percpu_putref(percpu_cprng_fast_ctx);
+}
+
+void
+cprng_fast_init(void)
+{
+ percpu_cprng_fast_ctx = percpu_alloc(sizeof(cprng_fast_ctx_t));
+ percpu_foreach(percpu_cprng_fast_ctx, cprng_fast_init_ctx, NULL);
+ cprng_fast_initialized++;
+ cprng_fast_rekey_softintr =
softint_establish(SOFTINT_CLOCK|SOFTINT_MPSAFE,
+ cprng_fast_rekey_one, NULL);
+}
+
+size_t
+_cprng_fast_exact(void *p, size_t len)
+{
+ uint32_t *pi = p, *iter;
+ int s;
+ size_t ilen = len / sizeof(*pi);
+ cprng_fast_ctx_t *ctx = percpu_getref(percpu_cprng_fast_ctx);
+
+ KASSERT(cprng_fast_initialized);
+ KASSERT(0 == ((uintptr_t)p % sizeof(uint32_t)));
+ KASSERT(ilen * sizeof(*pi) == len);
+
+ _cprng_fast_checkrekey(ctx);
+
+ s = splhigh();
+ for (iter = pi; iter < pi + ilen; iter++) {
+ hc128_extract(&ctx->hc128, (uint8_t *)iter);
+ }
+ splx(s);
+
+ ctx->numbytes += len;
+ percpu_putref(percpu_cprng_fast_ctx);
+ return len;
+}
+
+size_t
+_cprng_fast_inexact(void *p, size_t len)
+{
+ uint8_t *pc = p;
+ uint32_t *pi = p, tmp, *iter;
+ int s;
+ size_t initial_len, aligned_len, final_len, main_len;
+ cprng_fast_ctx_t *ctx = percpu_getref(percpu_cprng_fast_ctx);
+
+ KASSERT(cprng_fast_initialized);
+
+ initial_len = sizeof(uint32_t) - ((uintptr_t)pc % sizeof(uint32_t));
+ aligned_len = len - initial_len;
+ final_len = aligned_len % sizeof(uint32_t);
+ main_len = aligned_len - final_len;
+
+ main_len /= sizeof(uint32_t);
+
+ _cprng_fast_checkrekey(ctx);
+
+ s = splhigh();
+ if (initial_len) {
+ hc128_extract(&ctx->hc128, (uint8_t *)&tmp);
+ memcpy(pc, &tmp, initial_len);
+ pi = (uint32_t *)pc;
+ }
+
+ for (iter = pi; iter < pi + main_len ; iter++) {
+ hc128_extract(&ctx->hc128, (uint8_t *)iter);
+ }
+
+ if (final_len) {
+ hc128_extract(&ctx->hc128, (uint8_t *)&tmp);
+ memcpy(pi + main_len, &tmp, final_len);
+ }
+ splx(s);
- v = kmem_alloc(*oldlenp, KM_SLEEP);
- cprng_fast(v, *oldlenp);
- node.sysctl_data = v;
- node.sysctl_size = *oldlenp;
- error = sysctl_lookup(SYSCTLFN_CALL(&node));
- kmem_free(v, *oldlenp);
- return error;
+ ctx->numbytes += len;
+ percpu_putref(percpu_cprng_fast_ctx);
+ return len;
}
Index: lib/libkern/Makefile.libkern
===================================================================
RCS file: /cvsroot/src/sys/lib/libkern/Makefile.libkern,v
retrieving revision 1.32.2.1
diff -u -p -r1.32.2.1 Makefile.libkern
--- lib/libkern/Makefile.libkern 7 Apr 2014 01:10:55 -0000 1.32.2.1
+++ lib/libkern/Makefile.libkern 17 Apr 2014 03:17:19 -0000
@@ -54,7 +54,7 @@ SRCS+= cpuset.c inet_addr.c intoa.c
SRCS+= bswap64.c
.endif
SRCS+= md4c.c md5c.c rmd160.c sha1.c sha2.c murmurhash.c
-SRCS+= pmatch.c arc4random.c bcd.c mcount.c mertwist.c crc32.c
+SRCS+= pmatch.c bcd.c mcount.c mertwist.c crc32.c
SRCS+= ppath_kmem_alloc.c
Index: lib/libkern/arc4random.c
===================================================================
RCS file: lib/libkern/arc4random.c
diff -N lib/libkern/arc4random.c
--- lib/libkern/arc4random.c 24 Jun 2013 04:21:20 -0000 1.35
+++ /dev/null 1 Jan 1970 00:00:00 -0000
@@ -1,277 +0,0 @@
-/* $NetBSD: arc4random.c,v 1.35 2013/06/24 04:21:20 riastradh Exp $
*/
-
-/*-
- * Copyright (c) 2002, 2011 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Thor Lancelot Simon.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*-
- * THE BEER-WARE LICENSE
- *
- * <dan%FreeBSD.ORG@localhost> wrote this file. As long as you retain this
notice you
- * can do whatever you want with this stuff. If we meet some day, and you
- * think this stuff is worth it, you can buy me a beer in return.
- *
- * Dan Moschuk
- *
- * $FreeBSD: src/sys/libkern/arc4random.c,v 1.9 2001/08/30 12:30:58 bde Exp $
- */
-
-#include <sys/cdefs.h>
-
-#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/rngtest.h>
-#include <sys/systm.h>
-#include <sys/time.h>
-
-#ifdef _STANDALONE
-/*
- * XXX This is a load of bollocks. Standalone has no entropy source.
- * This module should be removed from libkern once we confirm nobody is
- * using it.
- */
-#define time_uptime 1
-typedef struct kmutex *kmutex_t;
-#define MUTEX_DEFAULT 0
-#define IPL_VM 0
-static void mutex_init(kmutex_t *m, int t, int i) {}
-static void mutex_spin_enter(kmutex_t *m) {}
-static void mutex_spin_exit(kmutex_t *m) {}
-typedef void rndsink_callback_t(void *, const void *, size_t);
-struct rndsink;
-static struct rndsink *rndsink_create(size_t n, rndsink_callback_t c, void *a)
- { return NULL; }
-static bool rndsink_request(struct rndsink *s, void *b, size_t n)
- { return true; }
-#else /* !_STANDALONE */
-#include <sys/kernel.h>
-#include <sys/mutex.h>
-#include <sys/rndsink.h>
-#endif /* _STANDALONE */
-
-#include <lib/libkern/libkern.h>
-
-/*
- * The best known attack that distinguishes RC4 output from a random
- * bitstream requires 2^25 bytes. (see Paul and Preneel, Analysis of
- * Non-fortuitous Predictive States of the RC4 Keystream Generator.
- * INDOCRYPT 2003, pp52 – 67).
- *
- * However, we discard the first 1024 bytes of output, avoiding the
- * biases detected in this paper. The best current attack that
- * can distinguish this "RC4[drop]" output seems to be Fleuhrer &
- * McGrew's attack which requires 2^30.6 bytes of output:
- * Fluhrer and McGrew, Statistical Analysis of the Alleged RC4
- * Keystream Generator. FSE 2000, pp19 – 30
- *
- * We begin trying to rekey at 2^24 bytes, and forcibly rekey at 2^29 bytes
- * even if the resulting key cannot be guaranteed to have full entropy.
- */
-#define ARC4_MAXBYTES (16 * 1024 * 1024)
-#define ARC4_HARDMAX (512 * 1024 * 1024)
-#define ARC4_RESEED_SECONDS 300
-#define ARC4_KEYBYTES 16 /* 128 bit key */
-
-static kmutex_t arc4_mtx;
-static struct rndsink *arc4_rndsink;
-
-static u_int8_t arc4_i, arc4_j;
-static int arc4_initialized = 0;
-static int arc4_numbytes = 0;
-static u_int8_t arc4_sbox[256];
-static time_t arc4_nextreseed;
-
-static rndsink_callback_t arc4_rndsink_callback;
-static void arc4_randrekey(void);
-static void arc4_randrekey_from(const uint8_t[ARC4_KEYBYTES], bool);
-static void arc4_init(void);
-static inline u_int8_t arc4_randbyte(void);
-static inline void arc4randbytes_unlocked(void *, size_t);
-void _arc4randbytes(void *, size_t);
-uint32_t _arc4random(void);
-
-static inline void
-arc4_swap(u_int8_t *a, u_int8_t *b)
-{
- u_int8_t c;
-
- c = *a;
- *a = *b;
- *b = c;
-}
-
-static void
-arc4_rndsink_callback(void *context __unused, const void *seed, size_t bytes)
-{
-
- KASSERT(bytes == ARC4_KEYBYTES);
- arc4_randrekey_from(seed, true);
-}
-
-/*
- * Stir our S-box with whatever we can get from the system entropy pool
- * now.
- */
-static void
-arc4_randrekey(void)
-{
- uint8_t seed[ARC4_KEYBYTES];
-
- const bool full_entropy = rndsink_request(arc4_rndsink, seed,
- sizeof(seed));
- arc4_randrekey_from(seed, full_entropy);
- explicit_memset(seed, 0, sizeof(seed));
-}
-
-/*
- * Stir our S-box with what's in seed.
- */
-static void
-arc4_randrekey_from(const uint8_t seed[ARC4_KEYBYTES], bool full_entropy)
-{
- uint8_t key[256];
- size_t n;
-
- mutex_spin_enter(&arc4_mtx);
-
- (void)memcpy(key, seed, ARC4_KEYBYTES);
-
- /* Rekey the arc4 state. */
- for (n = ARC4_KEYBYTES; n < sizeof(key); n++)
- key[n] = key[n % ARC4_KEYBYTES];
-
- for (n = 0; n < 256; n++) {
- arc4_j = (arc4_j + arc4_sbox[n] + key[n]) % 256;
- arc4_swap(&arc4_sbox[n], &arc4_sbox[arc4_j]);
- }
- arc4_i = arc4_j;
-
- explicit_memset(key, 0, sizeof(key));
-
- /*
- * Throw away the first N words of output, as suggested in the
- * paper "Weaknesses in the Key Scheduling Algorithm of RC4" by
- * Fluher, Mantin, and Shamir. (N = 256 in our case.)
- */
- for (n = 0; n < 256 * 4; n++)
- arc4_randbyte();
-
- /*
- * Reset for next reseed cycle. If we don't have full entropy,
- * caller has scheduled a reseed already.
- */
- arc4_nextreseed = time_uptime +
- (full_entropy? ARC4_RESEED_SECONDS : 0);
- arc4_numbytes = 0;
-
-#if 0 /* XXX */
- arc4_rngtest();
-#endif
-
- mutex_spin_exit(&arc4_mtx);
-}
-
-/*
- * Initialize our S-box to its beginning defaults.
- */
-static void
-arc4_init(void)
-{
- int n;
-
- mutex_init(&arc4_mtx, MUTEX_DEFAULT, IPL_VM);
- arc4_rndsink = rndsink_create(ARC4_KEYBYTES, &arc4_rndsink_callback,
- NULL);
-
- arc4_i = arc4_j = 0;
- for (n = 0; n < 256; n++)
- arc4_sbox[n] = (u_int8_t) n;
-
- arc4_randrekey();
- arc4_initialized = 1;
-}
-
-/*
- * Generate a random byte.
- */
-static inline u_int8_t
-arc4_randbyte(void)
-{
- u_int8_t arc4_t;
-
- arc4_i = (arc4_i + 1) % 256;
- arc4_j = (arc4_j + arc4_sbox[arc4_i]) % 256;
-
- arc4_swap(&arc4_sbox[arc4_i], &arc4_sbox[arc4_j]);
-
- arc4_t = (arc4_sbox[arc4_i] + arc4_sbox[arc4_j]) % 256;
- return arc4_sbox[arc4_t];
-}
-
-static inline void
-arc4randbytes_unlocked(void *p, size_t len)
-{
- u_int8_t *buf = (u_int8_t *)p;
- size_t i;
-
- for (i = 0; i < len; buf[i] = arc4_randbyte(), i++)
- continue;
-}
-
-void
-_arc4randbytes(void *p, size_t len)
-{
- /* Initialize array if needed. */
- if (!arc4_initialized) {
- arc4_init();
- /* avoid conditionalizing locking */
- arc4randbytes_unlocked(p, len);
- arc4_numbytes += len;
- return;
- }
- mutex_spin_enter(&arc4_mtx);
- arc4randbytes_unlocked(p, len);
- arc4_numbytes += len;
- mutex_spin_exit(&arc4_mtx);
- if ((arc4_numbytes > ARC4_MAXBYTES) ||
- (time_uptime > arc4_nextreseed)) {
- arc4_randrekey();
- }
-}
-
-u_int32_t
-_arc4random(void)
-{
- u_int32_t ret;
- u_int8_t *retc;
-
- retc = (u_int8_t *)&ret;
-
- _arc4randbytes(retc, sizeof(u_int32_t));
- return ret;
-}
Index: nfs/nfs_subs.c
===================================================================
RCS file: /cvsroot/src/sys/nfs/nfs_subs.c,v
retrieving revision 1.225
diff -u -p -r1.225 nfs_subs.c
--- nfs/nfs_subs.c 17 Mar 2014 09:35:24 -0000 1.225
+++ nfs/nfs_subs.c 17 Apr 2014 03:17:19 -0000
@@ -1489,7 +1489,6 @@ nfs_init0(void)
nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
if (nfs_ticks < 1)
nfs_ticks = 1;
- nfs_xid = cprng_fast32();
nfsdreq_init();
/*
@@ -1994,6 +1993,10 @@ nfs_getxid(void)
{
u_int32_t newxid;
+ if (__predict_false(nfs_xid == 0)) {
+ nfs_xid = cprng_fast32();
+ }
+
/* get next xid. skip 0 */
do {
newxid = atomic_inc_32_nv(&nfs_xid);
Index: sys/cprng.h
===================================================================
RCS file: /cvsroot/src/sys/sys/cprng.h,v
retrieving revision 1.9
diff -u -p -r1.9 cprng.h
--- sys/cprng.h 17 Jan 2014 02:08:56 -0000 1.9
+++ sys/cprng.h 17 Apr 2014 03:17:19 -0000
@@ -41,42 +41,91 @@
#include <sys/rnd.h> /* XXX users bogusly transitively need this */
#include <crypto/nist_ctr_drbg/nist_ctr_drbg.h>
+#include <crypto/hc128/hc128.h>
+#include <sys/percpu.h>
+#include <sys/intr.h>
/*
* NIST SP800-90 says 2^19 bytes per request for the CTR_DRBG.
*/
#define CPRNG_MAX_LEN 524288
+#define CPRNGF_MAXBYTES (512 * 1024 * 1024)
+#define CPRNGF_HARDMAX (1 * 1024 * 1024 * 1024)
+#define CPRNGF_RESEED_SECONDS 600
+
+typedef struct {
+ hc128_state_t hc128;
+ int numbytes;
+ time_t nextreseed;
+} cprng_fast_ctx_t;
+
/*
- * We do not want an arc4random() prototype available to anyone.
+ * This is a macro so we can skip any conditional logic at runtime if
+ * the size provided is a multiple of the underlying stream cipher
+ * blocksize, e.g. sizeof(padded struct).
*/
-void _arc4randbytes(void *, size_t);
-uint32_t _arc4random(void);
+#define cprng_fast(p, len) ((0 == (len % sizeof(uint32_t))) && \
+ (0 == ((uintptr_t)p % sizeof(uint32_t))) ? \
+ _cprng_fast_exact(p, len) : \
+ _cprng_fast_inexact(p, len))
+
+size_t _cprng_fast_exact(void *, size_t);
+size_t _cprng_fast_inexact(void *, size_t);
-static inline size_t
-cprng_fast(void *p, size_t len)
+static inline void
+_cprng_fast_checkrekey(cprng_fast_ctx_t *ctx)
{
- _arc4randbytes(p, len);
- return len;
+ extern void *cprng_fast_rekey_softintr;
+
+ if (__predict_false((ctx->numbytes > CPRNGF_MAXBYTES) ||
+ (time_uptime > ctx->nextreseed))) {
+ /* Schedule a deferred reseed */
+ softint_schedule(cprng_fast_rekey_softintr);
+ }
}
-static inline uint32_t
-cprng_fast32(void)
+static inline uint32_t cprng_fast32(void)
{
- return _arc4random();
+ uint32_t ret;
+ extern percpu_t *percpu_cprng_fast_ctx;
+ cprng_fast_ctx_t *ctx = percpu_getref(percpu_cprng_fast_ctx);
+ int s;
+
+ _cprng_fast_checkrekey(ctx);
+
+ s = splhigh();
+ hc128_extract(&ctx->hc128, (uint8_t *)&ret);
+ splx(s);
+
+ ctx->numbytes += sizeof(uint32_t);
+ percpu_putref(percpu_cprng_fast_ctx);
+ return ret;
}
-static inline uint64_t
-cprng_fast64(void)
+static inline uint64_t cprng_fast64(void)
{
- uint64_t r;
- _arc4randbytes(&r, sizeof(r));
- return r;
+ uint64_t ret;
+ extern percpu_t *percpu_cprng_fast_ctx;
+ cprng_fast_ctx_t *ctx = percpu_getref(percpu_cprng_fast_ctx);
+ int s;
+
+ _cprng_fast_checkrekey(ctx);
+
+ s = splhigh();
+ hc128_extract(&ctx->hc128, (uint8_t *)&ret);
+ hc128_extract(&ctx->hc128, (uint8_t *)(((uint32_t *)&ret) + 1));
+ splx(s);
+
+ ctx->numbytes += sizeof(uint64_t);
+ percpu_putref(percpu_cprng_fast_ctx);
+ return ret;
}
typedef struct cprng_strong cprng_strong_t;
void cprng_init(void);
+void cprng_fast_init(void);
#define CPRNG_INIT_ANY 0x00000001
#define CPRNG_REKEY_ANY 0x00000002
Home |
Main Index |
Thread Index |
Old Index