Subject: Re: find(1) with extended regexps
To: Luke Mewburn <lukem@NetBSD.org>
From: Daniel de Kok <danieldk@pobox.com>
List: tech-userlevel
Date: 07/18/2007 13:16:23
--4Ckj6UjgE2iN1+kY
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
On Wed, Jul 18, 2007 at 12:17:15PM +1000, Luke Mewburn wrote:
> On Tue, Jul 17, 2007 at 03:07:09PM +0200, Daniel de Kok wrote:
> | Any objections to committing the attached patch for adding the '-E'
> | option to find(1) to interpret regexps as extended regexps like grep
> | and sed do? FreeBSD also seems to provide this functionality, and
> | it is pretty handy, without having many downsides.
>
> The idea in principle sounds good to me.
>
> You should remove REG_BASIC from the argument list given to c_regex_common()
> by various functions that call it.
Sorry for missing that one. Since the only possible values are
case-sensitive/insensitive, I guess having a bool parameter for
this is also more appropriate.
If no one objects, I'll commit the attached patch.
-- Daniel
--4Ckj6UjgE2iN1+kY
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="find-extended-regex.diff"
Index: extern.h
===================================================================
RCS file: /cvsroot/src/usr.bin/find/extern.h,v
retrieving revision 1.27
diff -b -u -r1.27 extern.h
--- extern.h 6 Feb 2007 13:25:01 -0000 1.27
+++ extern.h 18 Jul 2007 11:08:38 -0000
@@ -95,4 +95,5 @@
PLAN *c_or(char ***, int);
PLAN *c_null(char ***, int);
-extern int ftsoptions, isdeprecated, isdepth, isoutput, issort, isxargs;
+extern int ftsoptions, isdeprecated, isdepth, isoutput, issort, isxargs,
+ regcomp_flags;
Index: find.1
===================================================================
RCS file: /cvsroot/src/usr.bin/find/find.1,v
retrieving revision 1.65
diff -b -u -r1.65 find.1
--- find.1 8 Mar 2007 21:23:00 -0000 1.65
+++ find.1 18 Jul 2007 11:08:38 -0000
@@ -32,7 +32,7 @@
.\"
.\" from: @(#)find.1 8.7 (Berkeley) 5/9/95
.\"
-.Dd February 8, 2007
+.Dd July 17, 2007
.Dt FIND 1
.Os
.Sh NAME
@@ -41,13 +41,13 @@
.Sh SYNOPSIS
.Nm
.Op Fl H | Fl L | Fl P
-.Op Fl dhsXx
+.Op Fl dEhsXx
.Ar file
.Op Ar file ...
.Op Ar expression
.Nm
.Op Fl H | Fl L | Fl P
-.Op Fl dhsXx
+.Op Fl dEhsXx
.Fl f Ar file
.Op Ar file ...
.Op Ar expression
@@ -107,6 +107,14 @@
Note, the default is
.Ar not
a breadth-first traversal.
+.It Fl E
+The
+.Fl E
+option causes
+.Ar regexp
+arguments to primaries to be interpreted as extended regular
+expressions (see
+.Xr re_format 7 ) .
.It Fl f
The
.Fl f
Index: function.c
===================================================================
RCS file: /cvsroot/src/usr.bin/find/function.c,v
retrieving revision 1.62
diff -b -u -r1.62 function.c
--- function.c 6 Feb 2007 13:25:01 -0000 1.62
+++ function.c 18 Jul 2007 11:08:38 -0000
@@ -55,6 +55,7 @@
#include <inttypes.h>
#include <limits.h>
#include <pwd.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -119,7 +120,7 @@
int f_user(PLAN *, FTSENT *);
int f_not(PLAN *, FTSENT *);
int f_or(PLAN *, FTSENT *);
-static PLAN *c_regex_common(char ***, int, enum ntype, int);
+static PLAN *c_regex_common(char ***, int, enum ntype, bool);
static PLAN *palloc(enum ntype, int (*)(PLAN *, FTSENT *));
extern int dotfd;
@@ -1559,7 +1560,7 @@
}
static PLAN *
-c_regex_common(char ***argvp, int isok, enum ntype type, int regcomp_flags)
+c_regex_common(char ***argvp, int isok, enum ntype type, bool icase)
{
char errbuf[LINE_MAX];
regex_t reg;
@@ -1578,7 +1579,8 @@
snprintf(lineregexp, len, "^%s(%s%s)$",
(regcomp_flags & REG_EXTENDED) ? "" : "\\", regexp,
(regcomp_flags & REG_EXTENDED) ? "" : "\\");
- rv = regcomp(®, lineregexp, REG_NOSUB|regcomp_flags);
+ rv = regcomp(®, lineregexp, REG_NOSUB|regcomp_flags|
+ (icase ? REG_ICASE : 0));
free(lineregexp);
if (rv != 0) {
regerror(rv, ®, errbuf, sizeof errbuf);
@@ -1594,14 +1596,14 @@
c_regex(char ***argvp, int isok)
{
- return (c_regex_common(argvp, isok, N_REGEX, REG_BASIC));
+ return (c_regex_common(argvp, isok, N_REGEX, false));
}
PLAN *
c_iregex(char ***argvp, int isok)
{
- return (c_regex_common(argvp, isok, N_IREGEX, REG_BASIC|REG_ICASE));
+ return (c_regex_common(argvp, isok, N_IREGEX, true));
}
/*
Index: main.c
===================================================================
RCS file: /cvsroot/src/usr.bin/find/main.c,v
retrieving revision 1.26
diff -b -u -r1.26 main.c
--- main.c 9 Nov 2006 20:50:53 -0000 1.26
+++ main.c 18 Jul 2007 11:08:38 -0000
@@ -68,6 +68,7 @@
int isoutput; /* user specified output operator */
int issort; /* sort directory entries */
int isxargs; /* don't permit xargs delimiting chars */
+int regcomp_flags = REG_BASIC; /* regex compilation flags */
int main(int, char **);
static void usage(void);
@@ -93,7 +94,7 @@
err(1, NULL);
ftsoptions = FTS_NOSTAT | FTS_PHYSICAL;
- while ((ch = getopt(argc, argv, "HLPXdf:hsx")) != -1)
+ while ((ch = getopt(argc, argv, "HLPdEf:hsXx")) != -1)
switch (ch) {
case 'H':
ftsoptions &= ~FTS_LOGICAL;
@@ -107,12 +108,12 @@
ftsoptions &= ~(FTS_COMFOLLOW|FTS_LOGICAL);
ftsoptions |= FTS_PHYSICAL;
break;
- case 'X':
- isxargs = 1;
- break;
case 'd':
isdepth = 1;
break;
+ case 'E':
+ regcomp_flags = REG_EXTENDED;
+ break;
case 'f':
*p++ = optarg;
break;
@@ -123,6 +124,9 @@
case 's':
issort = 1;
break;
+ case 'X':
+ isxargs = 1;
+ break;
case 'x':
ftsoptions |= FTS_XDEV;
break;
@@ -164,6 +168,6 @@
{
(void)fprintf(stderr,
-"usage: find [-H | -L | -P] [-Xdhsx] [-f file] file [file ...] [expression]\n");
+"usage: find [-H | -L | -P] [-dEhsXx] [-f file] file [file ...] [expression]\n");
exit(1);
}
--4Ckj6UjgE2iN1+kY--