Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/lib/libnvmm Micro optimizations:
details: https://anonhg.NetBSD.org/src/rev/979901344224
branches: trunk
changeset: 839813:979901344224
user: maxv <maxv%NetBSD.org@localhost>
date: Thu Mar 07 15:47:34 2019 +0000
description:
Micro optimizations:
- Compress x86_rexpref, x86_regmodrm, x86_opcode and x86_instr.
- Cache-align the register, opcode and group tables.
- Modify the opcode tables to have 256 entries, and avoid a lookup.
diffstat:
lib/libnvmm/libnvmm_x86.c | 314 ++++++++++++++++++---------------------------
1 files changed, 127 insertions(+), 187 deletions(-)
diffs (truncated from 786 to 300 lines):
diff -r 0fca0b04c4b2 -r 979901344224 lib/libnvmm/libnvmm_x86.c
--- a/lib/libnvmm/libnvmm_x86.c Thu Mar 07 15:22:21 2019 +0000
+++ b/lib/libnvmm/libnvmm_x86.c Thu Mar 07 15:47:34 2019 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: libnvmm_x86.c,v 1.26 2019/02/26 12:23:12 maxv Exp $ */
+/* $NetBSD: libnvmm_x86.c,v 1.27 2019/03/07 15:47:34 maxv Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -46,6 +46,7 @@
#include "nvmm.h"
#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+#define __cacheline_aligned __attribute__((__aligned__(64)))
#include <x86/specialreg.h>
@@ -904,15 +905,15 @@
bool adr_ovr:1;
bool rep:1;
bool repn:1;
- int seg;
+ int8_t seg;
};
struct x86_rexpref {
- bool present;
- bool w;
- bool r;
- bool x;
- bool b;
+ bool b:1;
+ bool x:1;
+ bool r:1;
+ bool w:1;
+ bool present:1;
};
struct x86_reg {
@@ -962,10 +963,9 @@
};
struct x86_regmodrm {
- bool present;
- enum REGMODRM__Mod mod;
- enum REGMODRM__Reg reg;
- enum REGMODRM__Rm rm;
+ uint8_t mod:2;
+ uint8_t reg:3;
+ uint8_t rm:3;
};
struct x86_immediate {
@@ -999,22 +999,20 @@
};
struct x86_instr {
- size_t len;
+ uint8_t len;
struct x86_legpref legpref;
struct x86_rexpref rexpref;
- size_t operand_size;
- size_t address_size;
+ struct x86_regmodrm regmodrm;
+ uint8_t operand_size;
+ uint8_t address_size;
uint64_t zeroextend_mask;
- struct x86_regmodrm regmodrm;
-
const struct x86_opcode *opcode;
+ const struct x86_emul *emul;
struct x86_store src;
struct x86_store dst;
struct x86_store *strm;
-
- const struct x86_emul *emul;
};
struct x86_decode_fsm {
@@ -1030,22 +1028,21 @@
};
struct x86_opcode {
- uint8_t byte;
- bool regmodrm;
- bool regtorm;
- bool dmo;
- bool todmo;
- bool movs;
- bool stos;
- bool lods;
- bool szoverride;
- int defsize;
- int allsize;
- bool group1;
- bool group3;
- bool group11;
- bool immediate;
- int flags;
+ bool valid:1;
+ bool regmodrm:1;
+ bool regtorm:1;
+ bool dmo:1;
+ bool todmo:1;
+ bool movs:1;
+ bool stos:1;
+ bool lods:1;
+ bool szoverride:1;
+ bool group1:1;
+ bool group3:1;
+ bool group11:1;
+ bool immediate:1;
+ uint8_t defsize;
+ uint8_t flags;
const struct x86_emul *emul;
};
@@ -1062,59 +1059,56 @@
#define FLAG_immz 0x02
#define FLAG_ze 0x04
-static const struct x86_group_entry group1[8] = {
+static const struct x86_group_entry group1[8] __cacheline_aligned = {
[1] = { .emul = &x86_emul_or },
[4] = { .emul = &x86_emul_and },
[6] = { .emul = &x86_emul_xor },
[7] = { .emul = &x86_emul_cmp }
};
-static const struct x86_group_entry group3[8] = {
+static const struct x86_group_entry group3[8] __cacheline_aligned = {
[0] = { .emul = &x86_emul_test },
[1] = { .emul = &x86_emul_test }
};
-static const struct x86_group_entry group11[8] = {
+static const struct x86_group_entry group11[8] __cacheline_aligned = {
[0] = { .emul = &x86_emul_mov }
};
-static const struct x86_opcode primary_opcode_table[] = {
+static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = {
/*
* Group1
*/
- {
+ [0x80] = {
/* Eb, Ib */
- .byte = 0x80,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.group1 = true,
.immediate = true,
.emul = NULL /* group1 */
},
- {
+ [0x81] = {
/* Ev, Iz */
- .byte = 0x81,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.group1 = true,
.immediate = true,
.flags = FLAG_immz,
.emul = NULL /* group1 */
},
- {
+ [0x83] = {
/* Ev, Ib */
- .byte = 0x83,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.group1 = true,
.immediate = true,
.flags = FLAG_imm8,
@@ -1124,26 +1118,24 @@
/*
* Group3
*/
- {
+ [0xF6] = {
/* Eb, Ib */
- .byte = 0xF6,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.group3 = true,
.immediate = true,
.emul = NULL /* group3 */
},
- {
+ [0xF7] = {
/* Ev, Iz */
- .byte = 0xF7,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.group3 = true,
.immediate = true,
.flags = FLAG_immz,
@@ -1153,26 +1145,24 @@
/*
* Group11
*/
- {
+ [0xC6] = {
/* Eb, Ib */
- .byte = 0xC6,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.group11 = true,
.immediate = true,
.emul = NULL /* group11 */
},
- {
+ [0xC7] = {
/* Ev, Iz */
- .byte = 0xC7,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.group11 = true,
.immediate = true,
.flags = FLAG_immz,
@@ -1182,353 +1172,321 @@
/*
* OR
*/
- {
+ [0x08] = {
/* Eb, Gb */
- .byte = 0x08,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_or
},
- {
+ [0x09] = {
/* Ev, Gv */
- .byte = 0x09,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_or
},
- {
+ [0x0A] = {
/* Gb, Eb */
- .byte = 0x0A,
+ .valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_or
},
- {
+ [0x0B] = {
/* Gv, Ev */
- .byte = 0x0B,
+ .valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = -1,
Home |
Main Index |
Thread Index |
Old Index