[src/trunk]: src/usr.bin/xlint/lint1 lint: clean up the lexer

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/trunk]: src/usr.bin/xlint/lint1 lint: clean up the lexer
From: rillig <rillig%NetBSD.org@localhost>
Date: Sun, 22 Jan 2023 22:46:47 +0000
details:   https://anonhg.NetBSD.org/src/rev/e3830bad9f47
branches:  trunk
changeset: 373162:e3830bad9f47
user:      rillig <rillig%NetBSD.org@localhost>
date:      Sun Jan 22 17:04:30 2023 +0000

description:
lint: clean up the lexer

No functional change.

diffstat:

 usr.bin/xlint/lint1/lex.c |  171 +++++++++++++++++----------------------------
 1 files changed, 64 insertions(+), 107 deletions(-)

diffs (truncated from 377 to 300 lines):

diff -r 0be20beac468 -r e3830bad9f47 usr.bin/xlint/lint1/lex.c
--- a/usr.bin/xlint/lint1/lex.c Sun Jan 22 16:05:08 2023 +0000
+++ b/usr.bin/xlint/lint1/lex.c Sun Jan 22 17:04:30 2023 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: lex.c,v 1.145 2023/01/22 16:05:08 rillig Exp $ */
+/* $NetBSD: lex.c,v 1.146 2023/01/22 17:04:30 rillig Exp $ */
 
 /*
  * Copyright (c) 1996 Christopher G. Demetriou.  All Rights Reserved.
@@ -38,7 +38,7 @@
 
 #include <sys/cdefs.h>
 #if defined(__RCSID)
-__RCSID("$NetBSD: lex.c,v 1.145 2023/01/22 16:05:08 rillig Exp $");
+__RCSID("$NetBSD: lex.c,v 1.146 2023/01/22 17:04:30 rillig Exp $");
 #endif
 
 #include <ctype.h>
@@ -70,7 +70,7 @@
 /*
  * Valid values for 'since' are 78, 90, 99, 11.
  *
- * As of 2022-04-30, lint treats 11 like 99, in order to provide good error
+ * The C11 keywords are added in C99 mode as well, to provide good error
  * messages instead of a simple parse error.  If the keyword '_Generic' were
  * not defined, it would be interpreted as an implicit function call, leading
  * to a parse error.
@@ -96,15 +96,15 @@
 
 /* During initialization, these keywords are written to the symbol table. */
 static const struct keyword {
-       const   char *kw_name;  /* keyword */
+       const   char *kw_name;
        int     kw_token;       /* token returned by yylex() */
-       scl_t   kw_scl;         /* storage class if kw_token T_SCLASS */
-       tspec_t kw_tspec;       /* type spec. if kw_token
+       scl_t   kw_scl;         /* storage class if kw_token is T_SCLASS */
+       tspec_t kw_tspec;       /* type spec if kw_token is
                                 * T_TYPE or T_STRUCT_OR_UNION */
-       tqual_t kw_tqual;       /* type qual. if kw_token T_QUAL */
-       bool    kw_c90:1;       /* C90 keyword */
-       bool    kw_c99_or_c11:1; /* C99 or C11 keyword */
-       bool    kw_gcc:1;       /* GCC keyword */
+       tqual_t kw_tqual;       /* type qualifier if kw_token is T_QUAL */
+       bool    kw_c90:1;       /* available in C90 mode */
+       bool    kw_c99_or_c11:1; /* available in C99 or C11 mode */
+       bool    kw_gcc:1;       /* available in GCC mode */
        bool    kw_plain:1;     /* 'name' */
        bool    kw_leading:1;   /* '__name' */
        bool    kw_both:1;      /* '__name__' */
@@ -112,8 +112,8 @@
        kwdef_keyword(  "_Alignas",     T_ALIGNAS),
        kwdef_keyword(  "_Alignof",     T_ALIGNOF),
        kwdef_token(    "alignof",      T_ALIGNOF,              78,0,6),
+       kwdef_token(    "asm",          T_ASM,                  78,1,7),
        kwdef_token(    "_Atomic",      T_ATOMIC,               11,0,1),
-       kwdef_token(    "asm",          T_ASM,                  78,1,7),
        kwdef_token(    "attribute",    T_ATTRIBUTE,            78,1,6),
        kwdef_sclass(   "auto",         AUTO,                   78,0,1),
        kwdef_type(     "_Bool",        BOOL,                   99),
@@ -178,11 +178,17 @@
 #undef kwdef_keyword
 };
 
-/* Symbol table */
-static sym_t   *symtab[HSHSIZ1];
+/*
+ * The symbol table containing all keywords, identifiers and labels. The hash
+ * entries are linked via sym_t.s_symtab_next.
+ */
+static sym_t *symtab[HSHSIZ1];
 
-/* type of next expected symbol */
-symt_t symtyp;
+/*
+ * The kind of the next expected symbol, to distinguish the namespaces of
+ * members, labels, type tags and other identifiers.
+ */
+symt_t symtyp;
 
 
 static unsigned int
@@ -272,7 +278,7 @@
 static void
 syms_add(struct syms *syms, const sym_t *sym)
 {
-       while (syms->len >= syms->cap) {
+       if (syms->len >= syms->cap) {
                syms->cap *= 2;
                syms->items = xrealloc(syms->items,
                    syms->cap * sizeof(syms->items[0]));
@@ -324,29 +330,28 @@
 static void
 add_keyword(const struct keyword *kw, bool leading, bool trailing)
 {
-       sym_t *sym;
-       char buf[256];
+
        const char *name;
-
        if (!leading && !trailing) {
                name = kw->kw_name;
        } else {
+               char buf[256];
                (void)snprintf(buf, sizeof(buf), "%s%s%s",
                    leading ? "__" : "", kw->kw_name, trailing ? "__" : "");
                name = xstrdup(buf);
        }
 
-       sym = block_zero_alloc(sizeof(*sym));
+       sym_t *sym = block_zero_alloc(sizeof(*sym));
        sym->s_name = name;
        sym->s_keyword = kw;
-       sym->u.s_keyword.sk_token = kw->kw_token;
-       if (kw->kw_token == T_TYPE || kw->kw_token == T_STRUCT_OR_UNION) {
+       int tok = kw->kw_token;
+       sym->u.s_keyword.sk_token = tok;
+       if (tok == T_TYPE || tok == T_STRUCT_OR_UNION)
                sym->u.s_keyword.sk_tspec = kw->kw_tspec;
-       } else if (kw->kw_token == T_SCLASS) {
+       if (tok == T_SCLASS)
                sym->s_scl = kw->kw_scl;
-       } else if (kw->kw_token == T_QUAL) {
+       if (tok == T_QUAL)
                sym->u.s_keyword.sk_qualifier = kw->kw_tqual;
-       }
 
        symtab_add(sym);
 }
@@ -374,17 +379,14 @@
        return true;
 }
 
-/*
- * All keywords are written to the symbol table. This saves us looking
- * in an extra table for each name we found.
- */
+/* Write all keywords to the symbol table. */
 void
 initscan(void)
 {
-       const struct keyword *kw, *end;
 
-       end = keywords + sizeof(keywords) / sizeof(keywords[0]);
-       for (kw = keywords; kw != end; kw++) {
+       size_t n = sizeof(keywords) / sizeof(keywords[0]);
+       for (size_t i = 0; i < n; i++) {
+               const struct keyword *kw = keywords + i;
                if (!is_keyword_known(kw))
                        continue;
                if (kw->kw_plain)
@@ -432,17 +434,9 @@
 }
 
 /*
- * Lex has found a letter followed by zero or more letters or digits.
- * It looks for a symbol in the symbol table with the same name. This
- * symbol must either be a keyword or a symbol of the type required by
- * symtyp (label, member, tag, ...).
- *
- * If it is a keyword, the token is returned. In some cases it is described
- * more deeply by data written to yylval.
- *
- * If it is a symbol, T_NAME is returned and the name is stored in yylval.
- * If there is already a symbol of the same name and type in the symbol
- * table, yylval.y_name->sb_sym points there.
+ * Look up the definition of a name in the symbol table. This symbol must
+ * either be a keyword or a symbol of the type required by symtyp (label,
+ * member, tag, ...).
  */
 extern int
 lex_name(const char *yytext, size_t yyleng)
@@ -470,10 +464,6 @@
 
 }
 
-/*
- * Convert a string representing an integer into internal representation.
- * Return T_CON, storing the numeric value in yylval, for yylex.
- */
 int
 lex_integer_constant(const char *yytext, size_t yyleng, int base)
 {
@@ -526,7 +516,6 @@
        typ = suffix_type[u_suffix][l_suffix];
 
        errno = 0;
-
        uq = (uint64_t)strtoull(cp, &eptr, base);
        lint_assert(eptr == cp + len);
        if (errno != 0) {
@@ -643,13 +632,6 @@
            : (int64_t)(q | ~vbits);
 }
 
-/*
- * Convert a string representing a floating point value into its numerical
- * representation. Type and value are returned in yylval.
- *
- * XXX Currently it is not possible to convert constants of type
- * long double which are greater than DBL_MAX.
- */
 int
 lex_floating_constant(const char *yytext, size_t yyleng)
 {
@@ -682,14 +664,15 @@
                warning(98);
        }
 
+       /* TODO: Handle precision and exponents of 'long double'. */
        errno = 0;
        d = strtod(cp, &eptr);
        if (eptr != cp + len) {
                switch (*eptr) {
                        /*
-                        * XXX: non-native non-current strtod() may not handle hex
-                        * floats, ignore the rest if we find traces of hex float
-                        * syntax...
+                        * XXX: Non-native non-current strtod() may not
+                        * handle hex floats, ignore the rest if we find
+                        * traces of hex float syntax.
                         */
                case 'p':
                case 'P':
@@ -851,8 +834,7 @@
  * Read a character which is part of a character constant or of a string
  * and handle escapes.
  *
- * The argument is the character which delimits the character constant or
- * string.
+ * 'delim' is '\'' for character constants and '"' for string literals.
  *
  * Returns -1 if the end of the character constant or string is reached,
  * -2 if the EOF is reached, and the character otherwise.
@@ -860,14 +842,13 @@
 static int
 get_escaped_char(int delim)
 {
-       int c;
 
-       if (prev_byte == -1) {
+       int c = prev_byte;
+       if (c != -1)
+               prev_byte = -1;
+       else
                c = read_byte();
-       } else {
-               c = prev_byte;
-               prev_byte = -1;
-       }
+
        if (c == delim)
                return -1;
        switch (c) {
@@ -1096,7 +1077,7 @@
 void
 lex_comment(void)
 {
-       int     c, lc;
+       int c;
        static const struct {
                const   char *keywd;
                bool    arg;
@@ -1125,9 +1106,8 @@
        char    arg[32];
        size_t  l, i;
        int     a;
-       bool    eoc;
 
-       eoc = false;
+       bool seen_end_of_comment = false;
 
        /* Skip whitespace after the start of the comment */
        while (c = read_byte(), isspace(c))
@@ -1173,37 +1153,27 @@
        while (isspace(c))
                c = read_byte();
 
-       if (c != '*' || (c = read_byte()) != '/') {
-               if (keywtab[i].func != linted)
-                       /* extra characters in lint comment */
-                       warning(257);
-       } else {
-               /*
-                * remember that we have already found the end of the
-                * comment
-                */
-               eoc = true;
-       }
+       seen_end_of_comment = c == '*' && (c = read_byte()) == '/';
+       if (!seen_end_of_comment && keywtab[i].func != linted)
+               /* extra characters in lint comment */
+               warning(257);
 
        if (keywtab[i].func != NULL)
-               (*keywtab[i].func)(a);
+               keywtab[i].func(a);
 
 skip_rest:
-       while (!eoc) {
Prev by Date: [xsrc/trunk]: xsrc/external/mit/fontconfig/dist revert workaround for lint
Next by Date: [src/trunk]: src tests/lint: merge tests for declaration after statement
Previous by Thread: [xsrc/trunk]: xsrc/external/mit/fontconfig/dist revert workaround for lint
Next by Thread: [src/trunk]: src tests/lint: merge tests for declaration after statement
Indexes:
Home | Main Index | Thread Index | Old Index