Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/crypto/adiantum Simplify internal Poly1305 API in adiant...



details:   https://anonhg.NetBSD.org/src/rev/65052afae95c
branches:  trunk
changeset: 1012211:65052afae95c
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Sun Jul 26 04:05:20 2020 +0000

description:
Simplify internal Poly1305 API in adiantum.c.

Should be slightly faster this way too.

diffstat:

 sys/crypto/adiantum/adiantum.c |  130 ++++++++++++++++++----------------------
 1 files changed, 59 insertions(+), 71 deletions(-)

diffs (201 lines):

diff -r 83ab93bf7f0c -r 65052afae95c sys/crypto/adiantum/adiantum.c
--- a/sys/crypto/adiantum/adiantum.c    Sun Jul 26 04:03:45 2020 +0000
+++ b/sys/crypto/adiantum/adiantum.c    Sun Jul 26 04:05:20 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: adiantum.c,v 1.4 2020/07/25 23:05:40 riastradh Exp $   */
+/*     $NetBSD: adiantum.c,v 1.5 2020/07/26 04:05:20 riastradh Exp $   */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: adiantum.c,v 1.4 2020/07/25 23:05:40 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: adiantum.c,v 1.5 2020/07/26 04:05:20 riastradh Exp $");
 
 #include <sys/types.h>
 #include <sys/endian.h>
@@ -207,8 +207,7 @@
 }
 
 static void
-poly1305_update_internal(struct poly1305 *P, const uint8_t m[static 16],
-    uint32_t pad)
+poly1305_update_blocks(struct poly1305 *P, const uint8_t *m, size_t mlen)
 {
        uint32_t r0 = P->r[0];
        uint32_t r1 = P->r[1];
@@ -220,41 +219,66 @@
        uint32_t h2 = P->h[2];
        uint32_t h3 = P->h[3];
        uint32_t h4 = P->h[4];
+       uint32_t m0, m1, m2, m3, m4; /* 26-bit message chunks */
        uint64_t k0, k1, k2, k3, k4; /* 64-bit extension of h */
        uint64_t p0, p1, p2, p3, p4; /* columns of product */
        uint32_t c;                  /* carry */
 
-       /* h' := h + m */
-       h0 += (le32dec(m +  0) >> 0) & 0x03ffffff;
-       h1 += (le32dec(m +  3) >> 2) & 0x03ffffff;
-       h2 += (le32dec(m +  6) >> 4) & 0x03ffffff;
-       h3 += (le32dec(m +  9) >> 6);
-       h4 += (le32dec(m + 12) >> 8) | (pad << 24);
+       while (mlen) {
+               if (__predict_false(mlen < 16)) {
+                       /* Handle padding for uneven last block.  */
+                       uint8_t buf[16];
+                       unsigned i;
 
-       /* extend to 64 bits */
-       k0 = h0;
-       k1 = h1;
-       k2 = h2;
-       k3 = h3;
-       k4 = h4;
+                       for (i = 0; i < mlen; i++)
+                               buf[i] = m[i];
+                       buf[i++] = 1;
+                       for (; i < 16; i++)
+                               buf[i] = 0;
+                       m0 = le32dec(buf +  0) >> 0;
+                       m1 = le32dec(buf +  3) >> 2;
+                       m2 = le32dec(buf +  6) >> 4;
+                       m3 = le32dec(buf +  9) >> 6;
+                       m4 = le32dec(buf + 12) >> 8;
+                       mlen = 0;
 
-       /* p := h' * r = (h + m)*r mod 2^130 - 5 */
-       p0 = r0*k0 + 5*r4*k1 + 5*r3*k2 + 5*r2*k3 + 5*r1*k4;
-       p1 = r1*k0 +   r0*k1 + 5*r4*k2 + 5*r3*k3 + 5*r2*k4;
-       p2 = r2*k0 +   r1*k1 +   r0*k2 + 5*r4*k3 + 5*r3*k4;
-       p3 = r3*k0 +   r2*k1 +   r1*k2 +   r0*k3 + 5*r4*k4;
-       p4 = r4*k0 +   r3*k1 +   r2*k2 +   r1*k3 +   r0*k4;
+                       explicit_memset(buf, 0, sizeof buf);
+               } else {
+                       m0 = le32dec(m +  0) >> 0;
+                       m1 = le32dec(m +  3) >> 2;
+                       m2 = le32dec(m +  6) >> 4;
+                       m3 = le32dec(m +  9) >> 6;
+                       m4 = le32dec(m + 12) >> 8;
+                       m4 |= 1u << 24;
+                       m += 16;
+                       mlen -= 16;
+               }
+
+               /* k := h + m, extended to 64 bits */
+               k0 = h0 + (m0 & 0x03ffffff);
+               k1 = h1 + (m1 & 0x03ffffff);
+               k2 = h2 + (m2 & 0x03ffffff);
+               k3 = h3 + m3;
+               k4 = h4 + m4;
 
-       /* propagate carries */
-       p0 += 0; c = p0 >> 26; h0 = p0 & 0x03ffffff;
-       p1 += c; c = p1 >> 26; h1 = p1 & 0x03ffffff;
-       p2 += c; c = p2 >> 26; h2 = p2 & 0x03ffffff;
-       p3 += c; c = p3 >> 26; h3 = p3 & 0x03ffffff;
-       p4 += c; c = p4 >> 26; h4 = p4 & 0x03ffffff;
+               /* p := k * r = (h + m)*r mod 2^130 - 5 */
+               p0 = r0*k0 + 5*r4*k1 + 5*r3*k2 + 5*r2*k3 + 5*r1*k4;
+               p1 = r1*k0 +   r0*k1 + 5*r4*k2 + 5*r3*k3 + 5*r2*k4;
+               p2 = r2*k0 +   r1*k1 +   r0*k2 + 5*r4*k3 + 5*r3*k4;
+               p3 = r3*k0 +   r2*k1 +   r1*k2 +   r0*k3 + 5*r4*k4;
+               p4 = r4*k0 +   r3*k1 +   r2*k2 +   r1*k3 +   r0*k4;
 
-       /* reduce 2^130 = 5 */
-       h0 += c*5; c = h0 >> 26; h0 &= 0x03ffffff;
-       h1 += c;
+               /* propagate carries and update h */
+               p0 += 0; c = p0 >> 26; h0 = p0 & 0x03ffffff;
+               p1 += c; c = p1 >> 26; h1 = p1 & 0x03ffffff;
+               p2 += c; c = p2 >> 26; h2 = p2 & 0x03ffffff;
+               p3 += c; c = p3 >> 26; h3 = p3 & 0x03ffffff;
+               p4 += c; c = p4 >> 26; h4 = p4 & 0x03ffffff;
+
+               /* reduce 2^130 = 5 */
+               h0 += c*5; c = h0 >> 26; h0 &= 0x03ffffff;
+               h1 += c;
+       }
 
        /* update hash values */
        P->h[0] = h0;
@@ -265,32 +289,6 @@
 }
 
 static void
-poly1305_update_block(struct poly1305 *P, const uint8_t m[static 16])
-{
-
-       poly1305_update_internal(P, m, 1);
-}
-
-static void
-poly1305_update_last(struct poly1305 *P, const uint8_t *m, size_t mlen)
-{
-       uint8_t buf[16];
-       unsigned i;
-
-       if (mlen == 16) {
-               poly1305_update_internal(P, m, 1);
-               return;
-       }
-
-       for (i = 0; i < mlen; i++)
-               buf[i] = m[i];
-       buf[i++] = 1;
-       for (; i < 16; i++)
-               buf[i] = 0;
-       poly1305_update_internal(P, buf, 0);
-}
-
-static void
 poly1305_final(uint8_t h[static 16], struct poly1305 *P)
 {
        uint32_t h0 = P->h[0];
@@ -345,9 +343,7 @@
        struct poly1305 P;
 
        poly1305_init(&P, k);
-       for (; mlen > 16; mlen -= 16, m += 16)
-               poly1305_update_block(&P, m);
-       poly1305_update_last(&P, m, mlen);
+       poly1305_update_blocks(&P, m, mlen);
        poly1305_final(h, &P);
 }
 
@@ -464,8 +460,7 @@
        poly1305_init(&P, pk);
        for (; mlen; m += MIN(mlen, 1024), mlen -= MIN(mlen, 1024)) {
                nh(h0, m, MIN(mlen, 1024), nhk);
-               poly1305_update_block(&P, h0 + 16*0);
-               poly1305_update_block(&P, h0 + 16*1);
+               poly1305_update_blocks(&P, h0, 32);
        }
        poly1305_final(h, &P);
 }
@@ -1834,7 +1829,6 @@
     const uint8_t kl[static 16],
     const uint32_t kn[static 268])
 {
-       const uint8_t *t8 = t;
        struct poly1305 P;
        uint8_t llenbuf[16];
        uint8_t ht[16];
@@ -1847,14 +1841,8 @@
 
        /* Compute H_T := Poly1305_{K_T}(le128(|l|) || tweak).  */
        poly1305_init(&P, kt);
-       if (tlen == 0) {
-               poly1305_update_last(&P, llenbuf, 16);
-       } else {
-               poly1305_update_block(&P, llenbuf);
-               for (; tlen > 16; t8 += 16, tlen -= 16)
-                       poly1305_update_block(&P, t8);
-               poly1305_update_last(&P, t8, tlen);
-       }
+       poly1305_update_blocks(&P, llenbuf, 16);
+       poly1305_update_blocks(&P, t, tlen);
        poly1305_final(ht, &P);
 
        /* Compute H_L := Poly1305_{K_L}(NH(pad_128(l))).  */



Home | Main Index | Thread Index | Old Index