Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/usr.bin/grep Replace usr.bin/grep with the BSD grep implemen...
details: https://anonhg.NetBSD.org/src/rev/da6240cbf8d7
branches: trunk
changeset: 762111:da6240cbf8d7
user: joerg <joerg%NetBSD.org@localhost>
date: Wed Feb 16 01:31:33 2011 +0000
description:
Replace usr.bin/grep with the BSD grep implementation from FreeBSD.
diffstat:
usr.bin/grep/Makefile | 27 +-
usr.bin/grep/TODO | 33 -
usr.bin/grep/binary.c | 99 ---
usr.bin/grep/fastgrep.c | 333 +++++++++++++
usr.bin/grep/file.c | 374 +++++++------
usr.bin/grep/grep.1 | 552 ++++++++++++++-------
usr.bin/grep/grep.c | 889 ++++++++++++++++++++--------------
usr.bin/grep/grep.h | 190 ++++---
usr.bin/grep/mmfile.c | 110 ----
usr.bin/grep/nls/C.msg | 13 +
usr.bin/grep/nls/es_ES.ISO8859-1.msg | 13 +
usr.bin/grep/nls/gl_ES.ISO8859-1.msg | 13 +
usr.bin/grep/nls/hu_HU.ISO8859-2.msg | 13 +
usr.bin/grep/nls/ja_JP.SJIS.msg | 13 +
usr.bin/grep/nls/ja_JP.UTF-8.msg | 13 +
usr.bin/grep/nls/ja_JP.eucJP.msg | 13 +
usr.bin/grep/nls/pt_BR.ISO8859-1.msg | 13 +
usr.bin/grep/nls/ru_RU.KOI8-R.msg | 13 +
usr.bin/grep/nls/uk_UA.UTF-8.msg | 12 +
usr.bin/grep/nls/zh_CN.UTF-8.msg | 13 +
usr.bin/grep/queue.c | 92 +--
usr.bin/grep/util.c | 505 ++++++++++++-------
22 files changed, 2040 insertions(+), 1306 deletions(-)
diffs (truncated from 4019 to 300 lines):
diff -r 2ee9d191c02d -r da6240cbf8d7 usr.bin/grep/Makefile
--- a/usr.bin/grep/Makefile Tue Feb 15 23:17:02 2011 +0000
+++ b/usr.bin/grep/Makefile Wed Feb 16 01:31:33 2011 +0000
@@ -1,20 +1,37 @@
-# $NetBSD: Makefile,v 1.3 2009/04/14 22:15:20 lukem Exp $
+# $NetBSD: Makefile,v 1.4 2011/02/16 01:31:33 joerg Exp $
+# $FreeBSD: head/usr.bin/grep/Makefile 210389 2010-07-22 19:11:57Z gabor $
+# $OpenBSD: Makefile,v 1.6 2003/06/25 15:00:04 millert Exp $
PROG= grep
-SRCS= binary.c file.c grep.c mmfile.c queue.c util.c
+SRCS= fastgrep.c file.c grep.c queue.c util.c
-LINKS= ${BINDIR}/grep ${BINDIR}/egrep \
+LINKS= ${BINDIR}/grep ${BINDIR}/egrep \
${BINDIR}/grep ${BINDIR}/fgrep \
${BINDIR}/grep ${BINDIR}/zgrep \
${BINDIR}/grep ${BINDIR}/zegrep \
${BINDIR}/grep ${BINDIR}/zfgrep
-MLINKS= grep.1 egrep.1 \
+MLINKS= grep.1 egrep.1 \
grep.1 fgrep.1 \
grep.1 zgrep.1 \
grep.1 zegrep.1 \
grep.1 zfgrep.1
-LDADD= -lz
+LDADD= -lz -lbz2
+DPADD= ${LIBZ} ${LIBBZ2}
+
+.PATH: ${.CURDIR}/nls
+
+NLS= C.msg \
+ es_ES.ISO8859-1.msg \
+ gl_ES.ISO8859-1.msg \
+ hu_HU.ISO8859-2.msg \
+ ja_JP.eucJP.msg \
+ ja_JP.SJIS.msg \
+ ja_JP.UTF-8.msg \
+ pt_BR.ISO8859-1.msg \
+ ru_RU.KOI8-R.msg \
+ uk_UA.UTF-8.msg \
+ zh_CN.UTF-8.msg
.include <bsd.prog.mk>
diff -r 2ee9d191c02d -r da6240cbf8d7 usr.bin/grep/TODO
--- a/usr.bin/grep/TODO Tue Feb 15 23:17:02 2011 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-
-$NetBSD: TODO,v 1.4 2006/04/08 23:56:39 wiz Exp $
-
-Hopefully this program can become a full drop-in replacement for
-GNU grep. If you want to help out, please let me (cjep@) know so that
-we can organise our efforts efficiently.
-
-1. Add functionality and maybe change flags to match GNU grep.
- * --include,--exclude
- possibly use code from pax for this.
-
-2. Binary file detection needs to be better (as currently this grep thinks
-its own source code is binary...). This implementation looks at the
-first few bytes to determine whether a file is binary. GNU grep seems
-to search for a byte worth 0 or 128 (depending on -z).
-
-(3. Merge in improvements from OpenBSD. Mostly done. Main improvement left
- is the speed up for simple regex's.)
-
-(4. Make code style more consistent with the NetBSD source tree. Have done
- a few fixes. Could probably do with more.)
-
-5. Maybe revisit symbolic link handling and -S, -P.
-
-6. Sort out any performance issues, e.g.
- i) this is slower than GNU grep;
- ii) we probably stat wastefully.
-
-7. Fix the manual page.
-
-8. Possible look at regex libc speedups from FreeBSD.
-
-9. The -v option is currently broken (i.e. does not invert the logic).
diff -r 2ee9d191c02d -r da6240cbf8d7 usr.bin/grep/binary.c
--- a/usr.bin/grep/binary.c Tue Feb 15 23:17:02 2011 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-/* $NetBSD: binary.c,v 1.3 2005/04/22 21:02:42 christos Exp $ */
-
-/*-
- * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
-
-#include <sys/cdefs.h>
-#ifndef lint
-__RCSID("$NetBSD: binary.c,v 1.3 2005/04/22 21:02:42 christos Exp $");
-#endif /* not lint */
-
-#include <ctype.h>
-#include <stdio.h>
-#include <zlib.h>
-
-#include "grep.h"
-
-#define BUFFER_SIZE 128
-
-static inline int
-okchar(unsigned char c)
-{
- return isprint(c) || isspace(c) || c == line_endchar;
-}
-
-int
-bin_file(FILE *f)
-{
- unsigned char buf[BUFFER_SIZE];
- size_t i, m;
-
- if (fseek(f, 0L, SEEK_SET) == -1)
- return 0;
-
- if ((m = fread(buf, 1, BUFFER_SIZE, f)) == 0)
- return 0;
-
- for (i = 0; i < m; i++)
- if (!okchar(buf[i]))
- return 1;
-
- rewind(f);
- return 0;
-}
-
-int
-gzbin_file(gzFile *f)
-{
- unsigned char buf[BUFFER_SIZE];
- int i, m;
-
- if (gzseek(f, 0L, SEEK_SET) == -1)
- return 0;
-
- if ((m = gzread(f, buf, BUFFER_SIZE)) <= 0)
- return 0;
-
- for (i = 0; i < m; i++)
- if (!okchar(buf[i]))
- return 1;
-
- gzrewind(f);
- return 0;
-}
-
-int
-mmbin_file(mmf_t *f)
-{
- size_t i;
- /* XXX knows too much about mmf internals */
- for (i = 0; i < BUFFER_SIZE && i < f->len; i++)
- if (!okchar(f->base[i]))
- return 1;
- mmrewind(f);
- return 0;
-}
diff -r 2ee9d191c02d -r da6240cbf8d7 usr.bin/grep/fastgrep.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr.bin/grep/fastgrep.c Wed Feb 16 01:31:33 2011 +0000
@@ -0,0 +1,333 @@
+/* $OpenBSD: util.c,v 1.36 2007/10/02 17:59:18 otto Exp $ */
+/* $FreeBSD: head/usr.bin/grep/fastgrep.c 211496 2010-08-19 09:28:59Z des $ */
+
+/*-
+ * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
+ * Copyright (C) 2008 Gabor Kovesdan <gabor%FreeBSD.org@localhost>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * XXX: This file is a speed up for grep to cover the defects of the
+ * regex library. These optimizations should practically be implemented
+ * there keeping this code clean. This is a future TODO, but for the
+ * meantime, we need to use this workaround.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: fastgrep.c,v 1.1 2011/02/16 01:31:33 joerg Exp $");
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "grep.h"
+
+static inline int grep_cmp(const unsigned char *, const unsigned char *, size_t);
+static inline void grep_revstr(unsigned char *, int);
+
+void
+fgrepcomp(fastgrep_t *fg, const char *pat)
+{
+ unsigned int i;
+
+ /* Initialize. */
+ fg->len = strlen(pat);
+ fg->bol = false;
+ fg->eol = false;
+ fg->reversed = false;
+
+ fg->pattern = grep_malloc(strlen(pat) + 1);
+ strcpy(fg->pattern, pat);
+
+ /* Preprocess pattern. */
+ for (i = 0; i <= UCHAR_MAX; i++)
+ fg->qsBc[i] = fg->len;
+ for (i = 1; i < fg->len; i++)
+ fg->qsBc[fg->pattern[i]] = fg->len - i;
+}
+
+/*
+ * Returns: -1 on failure, 0 on success
+ */
+int
+fastcomp(fastgrep_t *fg, const char *pat)
+{
+ unsigned int i;
+ int firstHalfDot = -1;
+ int firstLastHalfDot = -1;
+ int hasDot = 0;
+ int lastHalfDot = 0;
+ int shiftPatternLen;
+ bool bol = false;
+ bool eol = false;
+
+ /* Initialize. */
+ fg->len = strlen(pat);
+ fg->bol = false;
+ fg->eol = false;
+ fg->reversed = false;
+
+ /* Remove end-of-line character ('$'). */
+ if (fg->len > 0 && pat[fg->len - 1] == '$') {
+ eol = true;
+ fg->eol = true;
+ fg->len--;
+ }
+
+ /* Remove beginning-of-line character ('^'). */
+ if (pat[0] == '^') {
+ bol = true;
+ fg->bol = true;
+ fg->len--;
+ }
+
+ if (fg->len >= 14 &&
+ strncmp(pat + (fg->bol ? 1 : 0), "[[:<:]]", 7) == 0 &&
Home |
Main Index |
Thread Index |
Old Index