Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys Support large pages on KASLR kernels, in a way that does...



details:   https://anonhg.NetBSD.org/src/rev/e8b7d3eec0d7
branches:  trunk
changeset: 357571:e8b7d3eec0d7
user:      maxv <maxv%NetBSD.org@localhost>
date:      Wed Nov 15 18:02:36 2017 +0000

description:
Support large pages on KASLR kernels, in a way that does not reduce
randomness, but on the contrary that increases it.

The size of the kernel sub-blocks is changed to be 1MB. This produces a
kernel with sections that are always < 2MB in size, that can fit a large
page.

Each section is put in a 2MB physical chunk. In this chunk, there is a
padding of approximately 1MB. The prekern uses a random offset aligned to
sh_addralign, to shift the section in physical memory.

For example, physical memory layout created by the bootloader for .text.4
and .rodata.0:
 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
 |+---------------+                  |+---------------+                  |
 ||    .text.4    |       PAD        ||   .rodata.0   |       PAD        |
 |+---------------+                  |+---------------+                  |
 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
 PA                                  PA+2MB                         PA+4MB

Then, physical memory layout, after having been shifted by the prekern:
 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
 | P +---------------+               |          +---------------+        |
 | A |    .text.4    |      PAD      |   PAD    |   .rodata.0   |   PAD  |
 | D +---------------+               |          +---------------+        |
 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
 PA                                  PA+2MB                         PA+4MB

The kernel maps these 2MB physical chunks with 2MB large pages. Therefore,
randomness is enforced at both the virtual and physical levels, and the
resulting entropy is higher than that of our current implementaion until
now.

The padding around the section is filled by the prekern. Not to consume
too much memory, the sections that are smaller than PAGE_SIZE are mapped
with normal pages - because there is no point in optimizing them. In these
normal pages, the same shift is applied.

This change has two additional advantages: (a) the cache attacks based on
the TLB are mostly mitigated, because even if you are able to determine
that a given page-aligned range is mapped as executable you don't know
where exactly within that range the section actually begins, and (b) given
that we are slightly randomizing the physical layout we are making some
rare physical attacks more difficult to conduct.

NOTE: after this change you need to update GENERIC_KASLR / prekern /
bootloader.

diffstat:

 sys/arch/amd64/conf/Makefile.amd64     |   4 +-
 sys/arch/amd64/stand/prekern/elf.c     |   7 ++-
 sys/arch/amd64/stand/prekern/mm.c      |  58 +++++++++++++++++++++++++++------
 sys/arch/amd64/stand/prekern/prekern.h |   4 +-
 sys/arch/x86/x86/pmap.c                |  10 +----
 sys/lib/libsa/loadfile_elf32.c         |  26 +++++++++-----
 6 files changed, 73 insertions(+), 36 deletions(-)

diffs (298 lines):

diff -r a0131c65bfe1 -r e8b7d3eec0d7 sys/arch/amd64/conf/Makefile.amd64
--- a/sys/arch/amd64/conf/Makefile.amd64        Wed Nov 15 16:37:00 2017 +0000
+++ b/sys/arch/amd64/conf/Makefile.amd64        Wed Nov 15 18:02:36 2017 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: Makefile.amd64,v 1.63 2017/11/14 10:15:40 maxv Exp $
+#      $NetBSD: Makefile.amd64,v 1.64 2017/11/15 18:02:37 maxv Exp $
 
 # Makefile for NetBSD
 #
@@ -62,7 +62,7 @@
 ##
 TEXTADDR?=     0xffffffff80200000
 .if defined(KASLR)
-EXTRA_LINKFLAGS=       --split-by-file=0x200000 -r -d
+EXTRA_LINKFLAGS=       --split-by-file=0x100000 -r -d
 KERNLDSCRIPT?= ${AMD64}/conf/kern.ldscript.kaslr
 .else
 EXTRA_LINKFLAGS=       -z max-page-size=0x200000
diff -r a0131c65bfe1 -r e8b7d3eec0d7 sys/arch/amd64/stand/prekern/elf.c
--- a/sys/arch/amd64/stand/prekern/elf.c        Wed Nov 15 16:37:00 2017 +0000
+++ b/sys/arch/amd64/stand/prekern/elf.c        Wed Nov 15 18:02:36 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: elf.c,v 1.13 2017/11/14 07:06:34 maxv Exp $    */
+/*     $NetBSD: elf.c,v 1.14 2017/11/15 18:02:36 maxv Exp $    */
 
 /*
  * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
@@ -267,7 +267,7 @@
        int segtype;
        vaddr_t secva;
        paddr_t secpa;
-       size_t i, secsz;
+       size_t i, secsz, secalign;
 
        for (i = 0; i < eif.ehdr->e_shnum; i++) {
                shdr = &eif.shdr[i];
@@ -289,10 +289,11 @@
                }
                secpa = basepa + shdr->sh_offset;
                secsz = shdr->sh_size;
+               secalign = shdr->sh_addralign;
                ASSERT(shdr->sh_offset != 0);
                ASSERT(secpa % PAGE_SIZE == 0);
 
-               secva = mm_map_segment(segtype, secpa, secsz);
+               secva = mm_map_segment(segtype, secpa, secsz, secalign);
 
                /* We want (headva + sh_offset) to be the VA of the section. */
                ASSERT(secva > headva);
diff -r a0131c65bfe1 -r e8b7d3eec0d7 sys/arch/amd64/stand/prekern/mm.c
--- a/sys/arch/amd64/stand/prekern/mm.c Wed Nov 15 16:37:00 2017 +0000
+++ b/sys/arch/amd64/stand/prekern/mm.c Wed Nov 15 18:02:36 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: mm.c,v 1.13 2017/11/14 07:06:34 maxv Exp $     */
+/*     $NetBSD: mm.c,v 1.14 2017/11/15 18:02:36 maxv Exp $     */
 
 /*
  * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
@@ -34,6 +34,8 @@
 #define PAD_RODATA     0x00
 #define PAD_DATA       0x00
 
+#define ELFROUND       64
+
 static const pt_entry_t protection_codes[3] = {
        [MM_PROT_READ] = PG_RO | PG_NX,
        [MM_PROT_WRITE] = PG_RW | PG_NX,
@@ -236,7 +238,7 @@
 }
 
 static vaddr_t
-mm_randva_kregion(size_t size)
+mm_randva_kregion(size_t size, size_t align)
 {
        vaddr_t sva, eva;
        vaddr_t randva;
@@ -247,7 +249,7 @@
        while (1) {
                rnd = mm_rand_num64();
                randva = rounddown(KASLR_WINDOW_BASE +
-                   rnd % (KASLR_WINDOW_SIZE - size), PAGE_SIZE);
+                   rnd % (KASLR_WINDOW_SIZE - size), align);
 
                /* Detect collisions */
                ok = true;
@@ -313,22 +315,55 @@
        fatal("bootspace_addseg: segments full");
 }
 
+static size_t
+mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign)
+{
+       size_t shiftsize, offset;
+       uint64_t rnd;
+
+       if (elfalign == 0) {
+               elfalign = ELFROUND;
+       }
+
+       shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign);
+       if (shiftsize == 0) {
+               return 0;
+       }
+
+       rnd = mm_rand_num64();
+       offset = roundup(rnd % shiftsize, elfalign);
+       ASSERT((va + offset) % elfalign == 0);
+
+       memmove((void *)(va + offset), (void *)va, elfsz);
+
+       return offset;
+}
+
 vaddr_t
-mm_map_segment(int segtype, paddr_t pa, size_t elfsz)
+mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign)
 {
-       size_t i, npages, size;
+       size_t i, npages, size, pagesz, offset;
        vaddr_t randva;
        char pad;
 
-       size = roundup(elfsz, PAGE_SIZE);
-       randva = mm_randva_kregion(size);
+       if (elfsz < PAGE_SIZE) {
+               pagesz = NBPD_L1;
+       } else {
+               pagesz = NBPD_L2;
+       }
+
+       size = roundup(elfsz, pagesz);
+       randva = mm_randva_kregion(size, pagesz);
+
        npages = size / PAGE_SIZE;
-
        for (i = 0; i < npages; i++) {
                mm_enter_pa(pa + i * PAGE_SIZE,
                    randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE);
        }
 
+       offset = mm_shift_segment(randva, pagesz, elfsz, elfalign);
+       ASSERT(offset + elfsz <= size);
+
        if (segtype == BTSEG_TEXT) {
                pad = PAD_TEXT;
        } else if (segtype == BTSEG_RODATA) {
@@ -336,11 +371,12 @@
        } else {
                pad = PAD_DATA;
        }
-       memset((void *)(randva + elfsz), pad, size - elfsz);
+       memset((void *)randva, pad, offset);
+       memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset);
 
        bootspace_addseg(segtype, randva, pa, size);
 
-       return randva;
+       return (randva + offset);
 }
 
 static void
@@ -357,7 +393,7 @@
 
        /* Create the page tree */
        size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2;
-       randva = mm_randva_kregion(size);
+       randva = mm_randva_kregion(size, PAGE_SIZE);
 
        /* Enter the area and build the ELF info */
        bootpa = bootspace_getend();
diff -r a0131c65bfe1 -r e8b7d3eec0d7 sys/arch/amd64/stand/prekern/prekern.h
--- a/sys/arch/amd64/stand/prekern/prekern.h    Wed Nov 15 16:37:00 2017 +0000
+++ b/sys/arch/amd64/stand/prekern/prekern.h    Wed Nov 15 18:02:36 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: prekern.h,v 1.12 2017/11/14 07:06:34 maxv Exp $        */
+/*     $NetBSD: prekern.h,v 1.13 2017/11/15 18:02:36 maxv Exp $        */
 
 /*
  * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved.
@@ -111,7 +111,7 @@
 void mm_init(paddr_t);
 paddr_t mm_vatopa(vaddr_t);
 void mm_bootspace_mprotect(void);
-vaddr_t mm_map_segment(int, paddr_t, size_t);
+vaddr_t mm_map_segment(int, paddr_t, size_t, size_t);
 void mm_map_kernel(void);
 
 /* prekern.c */
diff -r a0131c65bfe1 -r e8b7d3eec0d7 sys/arch/x86/x86/pmap.c
--- a/sys/arch/x86/x86/pmap.c   Wed Nov 15 16:37:00 2017 +0000
+++ b/sys/arch/x86/x86/pmap.c   Wed Nov 15 18:02:36 2017 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.c,v 1.264 2017/11/11 12:51:05 maxv Exp $  */
+/*     $NetBSD: pmap.c,v 1.265 2017/11/15 18:02:37 maxv Exp $  */
 
 /*
  * Copyright (c) 2008, 2010, 2016, 2017 The NetBSD Foundation, Inc.
@@ -170,13 +170,12 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.264 2017/11/11 12:51:05 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.265 2017/11/15 18:02:37 maxv Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
 #include "opt_multiprocessor.h"
 #include "opt_xen.h"
-#include "opt_kaslr.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -1603,11 +1602,6 @@
        paddr_t pa;
        size_t i;
 
-#ifdef KASLR
-       /* XXX no large pages yet, soon */
-       return;
-#endif
-
        /* Remap the kernel text using large pages. */
        for (i = 0; i < BTSPACE_NSEGS; i++) {
                if (bootspace.segs[i].type != BTSEG_TEXT) {
diff -r a0131c65bfe1 -r e8b7d3eec0d7 sys/lib/libsa/loadfile_elf32.c
--- a/sys/lib/libsa/loadfile_elf32.c    Wed Nov 15 16:37:00 2017 +0000
+++ b/sys/lib/libsa/loadfile_elf32.c    Wed Nov 15 18:02:36 2017 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: loadfile_elf32.c,v 1.50 2017/11/13 21:32:21 maxv Exp $ */
+/* $NetBSD: loadfile_elf32.c,v 1.51 2017/11/15 18:02:37 maxv Exp $ */
 
 /*
  * Copyright (c) 1997, 2008, 2017 The NetBSD Foundation, Inc.
@@ -265,7 +265,8 @@
 
 /* -------------------------------------------------------------------------- */
 
-#define KERNALIGN 4096 /* XXX should depend on marks[] */
+#define KERNALIGN_SMALL (1 << 12)      /* XXX should depend on marks[] */
+#define KERNALIGN_LARGE (1 << 21)      /* XXX should depend on marks[] */
 
 /*
  * Read some data from a file, and put it in the bootloader memory (local).
@@ -343,7 +344,7 @@
        Elf_Shdr *shdr;
        Elf_Addr shpp, addr;
        int i, j, loaded;
-       size_t size, shdrsz;
+       size_t size, shdrsz, align;
        Elf_Addr maxp, elfp = 0;
        int ret;
 
@@ -385,14 +386,18 @@
        /*
         * Load the KERNEL SECTIONS.
         */
-       maxp = roundup(maxp, KERNALIGN);
+       maxp = roundup(maxp, KERNALIGN_SMALL);
        for (i = 0; i < elf->e_shnum; i++) {
-               addr = maxp;
-               size = (size_t)shdr[i].sh_size;
-
                if (!(shdr[i].sh_flags & SHF_ALLOC)) {
                        continue;
                }
+               size = (size_t)shdr[i].sh_size;
+               if (size <= KERNALIGN_SMALL) {
+                       align = KERNALIGN_SMALL;
+               } else {
+                       align = KERNALIGN_LARGE;
+               }
+               addr = roundup(maxp, align);
 
                loaded = 0;
                switch (shdr[i].sh_type) {
@@ -415,10 +420,11 @@
                }
 
                if (loaded) {
-                       shdr[i].sh_offset = maxp - elfp;
-                       maxp = roundup(maxp + size, KERNALIGN);
+                       shdr[i].sh_offset = addr - elfp;
+                       maxp = roundup(addr + size, align);
                }
        }
+       maxp = roundup(maxp, KERNALIGN_LARGE);
 
        /*
         * Load the SYM+REL SECTIONS.
@@ -456,7 +462,7 @@
                        break;
                }
        }
-       maxp = roundup(maxp, KERNALIGN);
+       maxp = roundup(maxp, KERNALIGN_SMALL);
 
        /*
         * Finally, load the SECTION HEADERS.



Home | Main Index | Thread Index | Old Index