Subject: bin/1996: Patch to add recursive options to grep(1)
To: None <gnats-bugs@NetBSD.ORG>
From: None <thieleke@icaen.uiowa.edu>
List: netbsd-bugs
Date: 01/30/1996 04:08:38
>Number: 1996
>Category: bin
>Synopsis: Patch to add recursive options to grep(1)
>Confidential: yes
>Severity: serious
>Priority: medium
>Responsible: bin-bug-people (Utility Bug People)
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Tue Jan 30 05:20:01 1996
>Last-Modified:
>Originator: Jeff Thieleke
>Organization:
noun: The state or manner of being organized.
>Release: <NetBSD-current source date>01/30/96
>Environment:
System: NetBSD picabo.icaen.uiowa.edu 1.1A NetBSD 1.1A (PICABO) #5: Tue Jan 16 08:52:54 CST 1996 root@picabo.icaen.uiowa.edu:/usr/src/sys/arch/amiga/compile/PICABO amiga
Architecture: amiga
>Description:
This patch file adds recursive directory searching features to the standard
grep, using the FTS functions. It was written by a poster to the FreeBSD
hackers mailing list - I have changed it to patch cleanly to our grep,
fixed a man page typo, and added the -r command line option.
There was a great deal of debate about this patch on the current-users
mailing list, with most of the oppositon of the opinion that 'find | xargs'
does the same job, without adding additional code bloat.
In addition to the recursive searching options, this patch adds the -a
command line option, to prevent searching binary files. Reportedly,
grep'ing large binaries would would grep to crash.
>How-To-Repeat:
not applicable
>Fix:
diff -rc grep/Makefile grep_r/Makefile
*** grep/Makefile Thu Jan 11 06:11:21 1996
--- grep_r/Makefile Mon Jan 22 15:49:32 1996
***************
*** 4,10 ****
SRCS= dfa.c grep.c getopt.c kwset.c obstack.c regex.c search.c
CFLAGS+=-DGREP -DHAVE_STRING_H=1 -DHAVE_SYS_PARAM_H=1 -DHAVE_UNISTD_H=1 \
-DHAVE_GETPAGESIZE=1 -DHAVE_MEMCHR=1 -DHAVE_STRERROR=1 \
! -DHAVE_VALLOC=1
LDADD+= -lgnumalloc
DPADD+= /usr/lib/libgnumalloc.a
--- 4,10 ----
SRCS= dfa.c grep.c getopt.c kwset.c obstack.c regex.c search.c
CFLAGS+=-DGREP -DHAVE_STRING_H=1 -DHAVE_SYS_PARAM_H=1 -DHAVE_UNISTD_H=1 \
-DHAVE_GETPAGESIZE=1 -DHAVE_MEMCHR=1 -DHAVE_STRERROR=1 \
! -DHAVE_VALLOC=1 -DHAVE_MMAP=1 -DHAVE_FTS=1
LDADD+= -lgnumalloc
DPADD+= /usr/lib/libgnumalloc.a
diff -rc grep/grep.1 grep_r/grep.1
*** grep/grep.1 Thu Jan 11 06:11:21 1996
--- grep_r/grep.1 Mon Jan 22 16:24:33 1996
***************
*** 4,25 ****
grep, egrep, fgrep \- print lines matching a pattern
.SH SYNOPOSIS
.B grep
! [
! .BR \- [[ AB "] ]\c"
! .I "num"
! ]
! [
! .BR \- [ CEFGVBchilnsvwx ]
! ]
! [
! .B \-e
! ]
! .I pattern
! |
! .BI \-f file
! ] [
! .I files...
! ]
.SH DESCRIPTION
.PP
.B Grep
--- 4,16 ----
grep, egrep, fgrep \- print lines matching a pattern
.SH SYNOPOSIS
.B grep
! [\-[AB] num]
! [\-HPRrS]
! [\-CEFGLVabchilnqsvwx]
! [\-e expr]
! [\-f file]
! files...
!
.SH DESCRIPTION
.PP
.B Grep
***************
*** 103,108 ****
--- 94,102 ----
to standard error. This version number should
be included in all bug reports (see below).
.TP
+ .B \-a
+ Don't search in binary files.
+ .TP
.B \-b
Print the byte offset within the input file before
each line of output.
***************
*** 169,174 ****
--- 163,204 ----
.TP
.B \-x
Select only those matches that exactly match the whole line.
+
+ .PP
+ Following options only avaible if compiled with FTS library:
+ .PD 0
+ .TP
+ .BI \-H
+ If the
+ .I \-R
+ option is specified, symbolic links on the command line
+ are followed. (Symbolic links encountered in the tree traversal
+ are not followed.)
+ .TP
+
+ .BI \-P
+ If the
+ .I \-R
+ option is specified, no symbolic links are followed.
+ .TP
+
+ .BI \-R
+ Search in the file hierarchies
+ rooted in the files instead of just the files themselves.
+ .TP
+
+ .BI \-r
+ Equivalent to
+ .BR \-RS .
+ .TP
+
+ .BI \-S
+ If the
+ .I \-R
+ option is specified, all symbolic links are followed.
+ .TP
+
+
.PD
.SH "REGULAR EXPRESSIONS"
.PP
diff -rc grep/grep.c grep_r/grep.c
*** grep/grep.c Thu Jan 11 06:11:21 1996
--- grep_r/grep.c Mon Jan 22 16:49:05 1996
***************
*** 57,62 ****
--- 57,63 ----
#include <unistd.h>
#else
#define O_RDONLY 0
+ #define STDIN_FILENO 0
extern int open(), read(), close();
#endif
***************
*** 94,99 ****
--- 95,110 ----
return 0;
}
#endif
+
+ /* traverse a file hierarchy library */
+ #ifdef HAVE_FTS
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <fts.h>
+ #endif
+
+ /* don't search in binary files */
+ int aflag;
/* Define flags declared in grep.h. */
char *matcher;
***************
*** 500,505 ****
--- 511,550 ----
return nlines;
}
+
+ /*
+ * try to guess if fd belong to a binary file
+ */
+
+ int isBinaryFile(fd)
+ int fd;
+ {
+ #define BINARY_BUF_LEN 32
+ static unsigned char buf[BINARY_BUF_LEN];
+ int i, n;
+
+ /* pipe, socket, fifo */
+ if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1)
+ return(0);
+
+ if ((n =(int) read(fd, buf, (size_t)BINARY_BUF_LEN)) == -1)
+ return(0);
+
+ /* look for non-printable chars */
+ for(i = 0; i < n; i++)
+ if (!isprint(buf[i]) && !isspace(buf[i]))
+ return(1);
+
+ /* reset fd to begin of file */
+ if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1)
+ return(0);
+
+
+ return(0);
+ }
+
+
+
/* Search a given file. Return a count of lines printed. */
static int
grep(fd)
***************
*** 509,514 ****
--- 554,563 ----
size_t residue, save;
char *beg, *lim;
+ /* skip binary files */
+ if (aflag && isBinaryFile(fd))
+ return(0);
+
reset(fd);
totalcc = 0;
***************
*** 569,576 ****
static char version[] = "GNU grep version 2.0";
#define USAGE \
! "usage: %s [-[[AB] ]<num>] [-[CEFGVchilnqsvwx]] [-[ef]] <expr> [<files...>]\n"
static void
usage()
--- 618,632 ----
static char version[] = "GNU grep version 2.0";
+ #ifdef HAVE_FTS
+ #define USAGE \
+ "usage: %s [-[AB] <num>] [-HPRrS] [-CEFGLVabchilnqsvwx]\n\
+ [-e <expr>] [-f file] [files ...]\n"
+ #else
#define USAGE \
! "usage: %s [-[AB] <num>] [-CEFGLVabchilnqsvwx]\n\
! [-e <expr>] [-f file] [files ...]\n"
! #endif
static void
usage()
***************
*** 609,614 ****
--- 665,676 ----
FILE *fp;
extern char *optarg;
extern int optind;
+ #ifdef HAVE_FTS
+ int Rflag, Hflag, Pflag, Lflag;
+ FTS *ftsp;
+ FTSENT *ftsent;
+ int fts_options;
+ #endif
prog = argv[0];
if (prog && strrchr(prog, '/'))
***************
*** 622,630 ****
list_files = 0;
suppress_errors = 0;
matcher = NULL;
! while ((opt = getopt(argc, argv, "0123456789A:B:CEFGVX:bce:f:hiLlnoqsvwxy"))
! != EOF)
switch (opt)
{
case '0':
--- 684,704 ----
list_files = 0;
suppress_errors = 0;
matcher = NULL;
+ aflag = 0;
+ #ifdef HAVE_FTS
+ Rflag = Hflag = Pflag = Lflag = 0;
+ #endif
+
+ while ((opt = getopt(argc, argv,
+
+ #ifndef HAVE_FTS
+ "0123456789A:B:CEFGVX:abce:f:hiLlnqsvwxy"
+ #else
+ "0123456789A:B:CEFGHLPRSVX:abce:f:hiLlnqrsvwxy?"
+ #endif
! )) != EOF)
!
switch (opt)
{
case '0':
***************
*** 676,681 ****
--- 750,788 ----
fatal("matcher already specified", 0);
matcher = optarg;
break;
+
+ #ifdef HAVE_FTS
+ /* symbolic links on the command line are followed */
+ case 'H':
+ Hflag = 1;
+ Lflag = Pflag = 0;
+ break;
+
+ /* no symbolic links are followed */
+ case 'P':
+ Pflag = 1;
+ Hflag = Lflag = 0;
+ break;
+
+ /* traverse file hierarchies */
+ case 'R':
+ Rflag = 1;
+ break;
+
+ /* all symbolic links are followed */
+ case 'S':
+ Lflag = 1;
+ Hflag = Pflag = 0;
+ break;
+
+ /* equivalent to -RS */
+ case 'r':
+ Lflag = Rflag = 1;
+ Hflag = Pflag = 0;
+ #endif
+ case 'a':
+ aflag = 1;
+ break;
case 'b':
out_byte = 1;
break;
***************
*** 776,787 ****
--- 883,1008 ----
(*compile)(keys, keycc);
+ #ifndef HAVE_FTS
if (argc - optind > 1 && !no_filenames)
+ #else
+ if ((argc - optind > 1 || Rflag) && !no_filenames)
+ #endif
out_file = 1;
status = 1;
+ #if HAVE_FTS
+ if (Rflag) {
+ fts_options = FTS_PHYSICAL | FTS_NOCHDIR;
+
+ if (Hflag)
+ fts_options |= FTS_COMFOLLOW;
+
+ if (Lflag) {
+ fts_options |= FTS_LOGICAL;
+ fts_options &= ~FTS_PHYSICAL;
+ }
+
+ if (Pflag) {
+ fts_options &= ~FTS_LOGICAL & ~FTS_COMFOLLOW;
+ fts_options |= FTS_PHYSICAL;
+ }
+ }
+
+ if (Rflag && optind < argc) {
+ int i;
+
+ /* replace "-" with "/dev/stdin" */
+ for (i = optind; i < argc; i++)
+ if (strcmp(argv[i], "-") == 0)
+ *(argv + i) = "/dev/stdin";
+
+ if ((ftsp = fts_open(argv + optind, fts_options,
+ (int(*)())NULL)) == NULL) {
+ if (!suppress_errors)
+ error("", errno);
+ } else {
+
+ while((ftsent = fts_read(ftsp)) != NULL) {
+ filename = ftsent->fts_accpath;
+
+ switch(ftsent->fts_info) {
+
+ /* regular file */
+ case FTS_F:
+ break;
+
+ /* directory */
+ case FTS_D:
+ case FTS_DC:
+ case FTS_DP:
+ continue; break;
+
+ /* errors */
+ case FTS_DNR:
+ error(filename, errno);
+ continue; break;
+
+ case FTS_ERR:
+ case FTS_NS:
+ error(filename, ftsent->fts_errno);
+ continue; break;
+
+ /* dead symlink */
+ case FTS_SLNONE:
+ continue; break;
+
+ /* symlink, don't skip */
+ case FTS_SL:
+ break;
+
+ default:
+ /*
+ if (!suppress_errors)
+ fprintf(stderr, "%s: ignored\n", filename);
+ continue; break;
+ */
+
+ }
+
+ if ((desc = open(filename, O_RDONLY)) == -1) {
+ error(filename, errno);
+ continue;
+ }
+
+ count = grep(desc);
+ if (count_matches)
+ {
+ if (out_file)
+ printf("%s:", filename);
+ printf("%d\n", count);
+ }
+ if (count)
+ {
+ status = 0;
+ if (list_files == 1)
+ printf("%s\n", filename);
+ }
+ else if (list_files == -1)
+ printf("%s\n", filename);
+
+ if (desc != STDIN_FILENO)
+ close(desc);
+ }
+
+ if (fts_close(ftsp) == -1)
+ error("fts_close", errno);
+ }
+
+ /* ! Rflag */
+ } else
+
+ #endif /* HAVE_FTS */
+
+ /* search in file names from arguments, not from stdin */
if (optind < argc)
+
while (optind < argc)
{
desc = strcmp(argv[optind], "-") ? open(argv[optind], O_RDONLY) : 0;
***************
*** 813,818 ****
--- 1034,1041 ----
close(desc);
++optind;
}
+
+ /* read input from stdin */
else
{
filename = "(standard input)";
>Audit-Trail:
>Unformatted:
no
Patch that adds recursive file traveral features to grep(1)
non-critical
low
change-request