Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/usr.bin/indent indent: clean up and document input handling
details: https://anonhg.NetBSD.org/src/rev/65cda4ad7e68
branches: trunk
changeset: 1026545:65cda4ad7e68
user: rillig <rillig%NetBSD.org@localhost>
date: Sun Nov 28 11:49:10 2021 +0000
description:
indent: clean up and document input handling
The transformation of moving comments from after an 'if (expr)' after
the following brace has a large implementation cost (about 300 lines of
code) and makes input handling quite complicated. Document the overall
idea to save future readers some time.
No functional change.
diffstat:
usr.bin/indent/indent.c | 6 ++--
usr.bin/indent/indent.h | 4 +-
usr.bin/indent/io.c | 69 +++++++++++++++++++++++++++++++++++++-----------
3 files changed, 58 insertions(+), 21 deletions(-)
diffs (209 lines):
diff -r 29dcecd4c638 -r 65cda4ad7e68 usr.bin/indent/indent.c
--- a/usr.bin/indent/indent.c Sun Nov 28 10:11:15 2021 +0000
+++ b/usr.bin/indent/indent.c Sun Nov 28 11:49:10 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: indent.c,v 1.237 2021/11/27 21:15:58 rillig Exp $ */
+/* $NetBSD: indent.c,v 1.238 2021/11/28 11:49:10 rillig Exp $ */
/*-
* SPDX-License-Identifier: BSD-4-Clause
@@ -43,7 +43,7 @@
#include <sys/cdefs.h>
#if defined(__NetBSD__)
-__RCSID("$NetBSD: indent.c,v 1.237 2021/11/27 21:15:58 rillig Exp $");
+__RCSID("$NetBSD: indent.c,v 1.238 2021/11/28 11:49:10 rillig Exp $");
#elif defined(__FreeBSD__)
__FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $");
#endif
@@ -322,7 +322,7 @@
}
debug_inp(__func__);
- inp_comment_rtrim();
+ inp_comment_rtrim_blank();
if (opt.swallow_optional_blanklines ||
(!comment_buffered && remove_newlines)) {
diff -r 29dcecd4c638 -r 65cda4ad7e68 usr.bin/indent/indent.h
--- a/usr.bin/indent/indent.h Sun Nov 28 10:11:15 2021 +0000
+++ b/usr.bin/indent/indent.h Sun Nov 28 11:49:10 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: indent.h,v 1.105 2021/11/27 21:15:58 rillig Exp $ */
+/* $NetBSD: indent.h,v 1.106 2021/11/28 11:49:10 rillig Exp $ */
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
@@ -377,7 +377,7 @@
void inp_comment_add_range(const char *, const char *);
bool inp_comment_complete_block(void);
bool inp_comment_seen(void);
-void inp_comment_rtrim(void);
+void inp_comment_rtrim_blank(void);
void inp_comment_rtrim_newline(void);
void inp_comment_insert_lbrace(void);
diff -r 29dcecd4c638 -r 65cda4ad7e68 usr.bin/indent/io.c
--- a/usr.bin/indent/io.c Sun Nov 28 10:11:15 2021 +0000
+++ b/usr.bin/indent/io.c Sun Nov 28 11:49:10 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: io.c,v 1.142 2021/11/27 21:15:58 rillig Exp $ */
+/* $NetBSD: io.c,v 1.143 2021/11/28 11:49:10 rillig Exp $ */
/*-
* SPDX-License-Identifier: BSD-4-Clause
@@ -43,7 +43,7 @@
#include <sys/cdefs.h>
#if defined(__NetBSD__)
-__RCSID("$NetBSD: io.c,v 1.142 2021/11/27 21:15:58 rillig Exp $");
+__RCSID("$NetBSD: io.c,v 1.143 2021/11/28 11:49:10 rillig Exp $");
#elif defined(__FreeBSD__)
__FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
#endif
@@ -55,18 +55,41 @@
#include "indent.h"
+/*
+ * There are 3 modes for reading the input.
+ *
+ * default: In this mode, the input comes from the input file. The buffer
+ * 'inp' contains the current line, terminated with '\n'. The current read
+ * position is inp.s, and there is always inp.buf <= inp.s < inp.e. All other
+ * pointers are null.
+ *
+ * copy-in: After reading 'if (expr)' or similar tokens, the input still comes
+ * from 'inp', but instead of processing it, it is copied to 'save_com'. The
+ * goal of this mode is to move the comments after the '{', that is to
+ * transform 'if (expr) comment {' to 'if (expr) { comment'. When the next
+ * token cannot be part of this transformation, switch to copy-out.
+ *
+ * copy-out: In this mode, the input comes from 'save_com', which contains the
+ * tokens to be placed after the '{'. The input still comes from the range
+ * [inp.s, inp.e), but these two members have been overwritten with pointers
+ * into save_com_buf, so inp.buf and inp.s are unrelated, which is unusual.
+ * In this mode, inp.e[-1] is usually not terminated with '\n'. After reading
+ * all tokens from save_com, switch to default mode again.
+ */
static struct {
struct buffer inp; /* one line of input, ready to be split into
- * tokens; occasionally this buffer switches
+ * tokens; occasionally 's' and 'e' switch
* to save_com_buf */
char save_com_buf[5000]; /* input text is saved here when looking for
* the brace after an if, while, etc */
- char *save_com_s; /* start of the comment in save_com_buf */
- char *save_com_e; /* end of the comment in save_com_buf */
+ char *save_com_s; /* start of the comment in save_com_buf, or
+ * null */
+ char *save_com_e; /* end of the comment in save_com_buf, or
+ * null */
char *saved_inp_s; /* saved value of inp.s when taking input from
- * save_com */
- char *saved_inp_e; /* saved value of inp.e */
+ * save_com, or null */
+ char *saved_inp_e; /* saved value of inp.e, or null */
} inbuf;
static int paren_indent;
@@ -92,10 +115,6 @@
const char *
inp_line_start(void)
{
- /*
- * The comment we're about to read usually comes from inp.buf, unless it
- * has been copied into save_com.
- */
return inbuf.saved_inp_s != NULL ? inbuf.save_com_buf : inbuf.inp.buf;
}
@@ -149,6 +168,9 @@
void
debug_inp(const char *prefix)
{
+ assert(inp_line_start() <= inbuf.inp.s);
+ assert(inbuf.inp.s <= inbuf.inp.e);
+
debug_println("%s %s:", __func__, prefix);
if (inbuf.saved_inp_s == NULL)
debug_inp_buf("inp.buf", inbuf.inp.buf, inbuf.inp.s);
@@ -231,6 +253,10 @@
{
if (inbuf.save_com_e == NULL) { /* if this is the first comment, we
* must set up the buffer */
+ /*
+ * XXX: No space is reserved for a potential '{' here, unlike in
+ * inp_comment_init_comment.
+ */
inbuf.save_com_s = inbuf.save_com_buf;
inbuf.save_com_e = inbuf.save_com_s;
} else {
@@ -269,19 +295,25 @@
}
void
-inp_comment_rtrim(void)
+inp_comment_rtrim_blank(void)
{
- while (inbuf.save_com_e > inbuf.save_com_s && ch_isblank(inbuf.save_com_e[-1]))
+ while (inbuf.save_com_e > inbuf.save_com_s &&
+ ch_isblank(inbuf.save_com_e[-1]))
inbuf.save_com_e--;
}
void
inp_comment_rtrim_newline(void)
{
- while (inbuf.save_com_e > inbuf.save_com_s && inbuf.save_com_e[-1] == '\n')
+ while (inbuf.save_com_e > inbuf.save_com_s &&
+ inbuf.save_com_e[-1] == '\n')
inbuf.save_com_e--;
}
+/*
+ * Switch the input to come from save_com, replaying the copied tokens while
+ * looking for the next '{'.
+ */
void
inp_from_comment(void)
{
@@ -289,7 +321,7 @@
inbuf.saved_inp_s = inbuf.inp.s;
inbuf.saved_inp_e = inbuf.inp.e;
- inbuf.inp.s = inbuf.save_com_s; /* redirect lexi input to save_com_s */
+ inbuf.inp.s = inbuf.save_com_s;
inbuf.inp.e = inbuf.save_com_e;
inbuf.save_com_s = NULL;
inbuf.save_com_e = NULL;
@@ -521,6 +553,7 @@
output_char(line_terminator);
ps.stats.lines++;
+ /* TODO: rename to blank_line_after_decl */
if (ps.just_saw_decl == 1 && opt.blanklines_after_decl) {
blank_line_before = true;
ps.just_saw_decl = 0;
@@ -643,8 +676,8 @@
skip_blank(&p);
if (!skip_string(&p, "INDENT"))
return;
+
skip_blank(&p);
-
if (*p == '*' || skip_string(&p, "ON"))
on = true;
else if (skip_string(&p, "OFF"))
@@ -661,6 +694,10 @@
inhibit_formatting = !on;
if (on) {
+ /*
+ * XXX: Does this make sense? Is the handling of blank lines above
+ * INDENT OFF comments essentially the same?
+ */
blank_lines_to_output = 0;
blank_line_after = false;
blank_line_before = false;
Home |
Main Index |
Thread Index |
Old Index