Subject: Re: tar --exclude
To: None <tech-userlevel@netbsd.org>
From: Christos Zoulas <christos@zoulas.com>
List: tech-userlevel
Date: 12/05/2002 18:46:09
In article <87k7ioh3bf.wl@orinoco.my.domain>,
<hiramatu@boreas.dti.ne.jp> wrote:
>-=-=-=-=-=-
>
>Hi,
>
>From man page of GNU tar,
>
> --exclude pattern Exclude files matching the pattern (don't extract
> them, don't add them, don't list them).
>
>I think this option is useful, and here is my attempt to add --exclude
>to our tar.
>
>Thanks in advance for comments.
Thanks a lot for the patch. I've made some minor adjustments to it, to
simplify the code and minimize the diffs.
christos
Index: ar_subs.c
===================================================================
RCS file: /cvsroot/basesrc/bin/pax/ar_subs.c,v
retrieving revision 1.20
diff -u -u -r1.20 ar_subs.c
--- ar_subs.c 2002/10/18 11:54:22 1.20
+++ ar_subs.c 2002/12/05 18:44:24
@@ -50,6 +50,7 @@
#include <sys/time.h>
#include <sys/stat.h>
#include <sys/param.h>
+#include <assert.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
@@ -62,7 +63,8 @@
#include "pax.h"
#include "extern.h"
-static void wr_archive(ARCHD *, int is_app);
+static void wr_archive(ARCHD *, int);
+static int match(ARCHD *);
static int get_arc(void);
static int next_head(ARCHD *);
extern sigset_t s_mask;
@@ -76,6 +78,23 @@
u_long flcnt; /* number of files processed */
ARCHD archd;
+static int
+match(arcn)
+ ARCHD *arcn;
+{
+ int res;
+ switch (res = pat_match(arcn, EXCLUDE)) {
+ case DONE:
+ case SKIP:
+ return res;
+ case PROCEED:
+ return pat_match(arcn, INCLUDE);
+ default:
+ assert(res == DONE);
+ return DONE;
+ }
+}
+
/*
* list()
* list the contents of an archive which match user supplied pattern(s)
@@ -125,10 +144,10 @@
* check for pattern, and user specified options match.
* When all patterns are matched we are done.
*/
- if ((res = pat_match(arcn)) < 0)
+ if ((res = match(arcn)) == DONE)
break;
- if ((res == 0) && (sel_chk(arcn) == 0)) {
+ if ((res == PROCEED) && (sel_chk(arcn) == 0)) {
/*
* pattern resulted in a selected file
*/
@@ -219,10 +238,10 @@
* check for pattern, and user specified options match. When
* all the patterns are matched we are done
*/
- if ((res = pat_match(arcn)) < 0)
+ if ((res = match(arcn)) == DONE)
break;
- if ((res > 0) || (sel_chk(arcn) != 0)) {
+ if ((res == SKIP) || (sel_chk(arcn) != 0)) {
/*
* file is not selected. skip past any file
* data and padding and go back for the next
Index: extern.h
===================================================================
RCS file: /cvsroot/basesrc/bin/pax/extern.h,v
retrieving revision 1.35
diff -u -u -r1.35 extern.h
--- extern.h 2002/10/17 00:32:36 1.35
+++ extern.h 2002/12/05 18:44:24
@@ -209,10 +209,18 @@
* pat_rep.c
*/
int rep_add(char *);
-int pat_add(char *, char *);
+#define INCLUDE 0
+#define EXCLUDE 1
+int pat_add(char *, char *, int);
+void invert_patterns(void);
void pat_chk(void);
+#define SELECT_OK 0
+#define SELECT_NG 1
int pat_sel(ARCHD *);
-int pat_match(ARCHD *);
+#define SKIP 0
+#define PROCEED 1
+#define DONE 2
+int pat_match(ARCHD *, int);
int mod_name(ARCHD *);
int set_dest(ARCHD *, char *, int);
Index: options.c
===================================================================
RCS file: /cvsroot/basesrc/bin/pax/options.c,v
retrieving revision 1.55
diff -u -u -r1.55 options.c
--- options.c 2002/10/18 13:45:05 1.55
+++ options.c 2002/12/05 18:44:24
@@ -639,7 +639,7 @@
case LIST:
case EXTRACT:
for (; optind < argc; optind++)
- if (pat_add(argv[optind], NULL) < 0)
+ if (pat_add(argv[optind], NULL, INCLUDE) < 0)
pax_usage();
break;
case COPY:
@@ -663,6 +663,9 @@
maxflt = 0;
break;
}
+ if (cflag) {
+ invert_patterns();
+ }
}
@@ -701,6 +704,8 @@
{ "absolute-paths", no_argument, 0, 'P' },
{ "files-from", required_argument, 0, 'T' },
{ "exclude-from", required_argument, 0, 'X' },
+ { "exclude", required_argument, 0,
+ OPT_EXCLUDE },
{ "compress", no_argument, 0, 'Z' },
{ "uncompress", no_argument, 0, 'Z' },
{ "strict", no_argument, 0,
@@ -754,8 +759,6 @@
{ "version", no_argument, 0,
OPT_VERSION },
{ "verify", no_argument, 0, 'W' },
- { "exclude", required_argument, 0,
- OPT_EXCLUDE },
{ "block-compress", no_argument, 0,
OPT_BLOCK_COMPRESS },
{ "norecurse", no_argument, 0,
@@ -772,11 +775,18 @@
int Oflag = 0;
int nincfiles = 0;
int incfiles_max = 0;
+ int nexfiles = 0;
+ int exfiles_max = 0;
struct incfile {
char *file;
char *dir;
};
+ struct exfile {
+ char *file;
+ char *dir;
+ };
struct incfile *incfiles = NULL;
+ struct exfile *exfiles = NULL;
/*
* Set default values.
@@ -977,11 +987,18 @@
Aflag = 1;
break;
case 'X':
- /*
- * GNU tar compat: exclude the files listed in optarg
- */
- if (tar_gnutar_X_compat(optarg) != 0)
- tar_usage();
+ if (++nexfiles > exfiles_max) {
+ exfiles_max = nexfiles + 3;
+ exfiles = realloc(exfiles,
+ sizeof(*exfiles) * exfiles_max);
+ if (exfiles == NULL) {
+ tty_warn(0, "Unable to allocate space "
+ "for option list");
+ exit(1);
+ }
+ }
+ exfiles[nexfiles - 1].file = optarg;
+ exfiles[nexfiles - 1].dir = chdname;
break;
case 'Z':
/*
@@ -1008,6 +1025,9 @@
case '8':
arcname = DEV_8;
break;
+ case OPT_EXCLUDE:
+ pat_add(optarg, NULL, EXCLUDE);
+ break;
case OPT_ATIME_PRESERVE:
patime = 1;
break;
@@ -1070,9 +1090,10 @@
{
int sawpat = 0;
int dirisnext = 0;
+ int pat_flg;
char *file, *dir;
- while (nincfiles || *argv != NULL) {
+ while (nincfiles || nexfiles || *argv != NULL) {
/*
* If we queued up any include files,
* pull them in now. Otherwise, check
@@ -1085,6 +1106,13 @@
dir = incfiles->dir;
incfiles++;
nincfiles--;
+ pat_flg = INCLUDE;
+ } else if (nexfiles) {
+ file = exfiles->file;
+ dir = exfiles->dir;
+ exfiles++;
+ nexfiles--;
+ pat_flg = EXCLUDE;
} else if (strcmp(*argv, "-I") == 0) {
if (*++argv == NULL)
break;
@@ -1112,7 +1140,7 @@
dirisnext = 1;
continue;
}
- if (pat_add(str, dir) < 0)
+ if (pat_add(str, dir, pat_flg) < 0)
tar_usage();
sawpat = 1;
}
@@ -1129,7 +1157,7 @@
if (*++argv == NULL)
break;
chdname = *argv++;
- } else if (pat_add(*argv++, chdname) < 0)
+ } else if (pat_add(*argv++, chdname, INCLUDE) < 0)
tar_usage();
else
sawpat = 1;
@@ -1498,7 +1526,7 @@
cpio_usage();
}
while ((str = getline(fp)) != NULL) {
- pat_add(str, NULL);
+ pat_add(str, NULL, INCLUDE);
}
fclose(fp);
if (getline_error) {
@@ -1626,7 +1654,7 @@
case LIST:
case EXTRACT:
for (; optind < argc; optind++)
- if (pat_add(argv[optind], 0) < 0)
+ if (pat_add(argv[optind], 0, INCLUDE) < 0)
cpio_usage();
break;
case COPY:
Index: pat_rep.c
===================================================================
RCS file: /cvsroot/basesrc/bin/pax/pat_rep.c,v
retrieving revision 1.16
diff -u -u -r1.16 pat_rep.c
--- pat_rep.c 2002/10/23 19:39:42 1.16
+++ pat_rep.c 2002/12/05 18:44:25
@@ -55,6 +55,7 @@
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
+#include <assert.h>
#ifdef NET2_REGEX
#include <regexp.h>
#else
@@ -72,10 +73,10 @@
*/
#define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */
-static PATTERN *pathead = NULL; /* file pattern match list head */
-static PATTERN *pattail = NULL; /* file pattern match list tail */
-static REPLACE *rephead = NULL; /* replacement string list head */
-static REPLACE *reptail = NULL; /* replacement string list tail */
+static PATTERN *pathead[2]; /* file pattern match list head */
+static PATTERN *pattail[2]; /* file pattern match list tail */
+static REPLACE *rephead; /* replacement string list head */
+static REPLACE *reptail; /* replacement string list tail */
static int rep_name(char *, size_t, int *, int);
static int tty_rename(ARCHD *);
@@ -247,7 +248,7 @@
*/
int
-pat_add(char *str, char *chdn)
+pat_add(char *str, char *chdn, int pat_type)
{
PATTERN *pt;
@@ -275,15 +276,27 @@
pt->fow = NULL;
pt->flgs = 0;
pt->chdname = chdn;
- if (pathead == NULL) {
- pattail = pathead = pt;
+ if (pathead[pat_type] == NULL) {
+ pattail[pat_type] = pathead[pat_type] = pt;
return(0);
}
- pattail->fow = pt;
- pattail = pt;
+ pattail[pat_type]->fow = pt;
+ pattail[pat_type] = pt;
return(0);
}
+void
+invert_patterns(void)
+{
+ PATTERN *pt;
+ pt = pathead[INCLUDE];
+ pathead[INCLUDE] = pathead[EXCLUDE];
+ pathead[EXCLUDE] = pt;
+ pt = pattail[INCLUDE];
+ pattail[INCLUDE] = pattail[EXCLUDE];
+ pattail[EXCLUDE] = pt;
+}
+
/*
* pat_chk()
* complain if any the user supplied pattern did not result in a match to
@@ -300,7 +313,7 @@
* walk down the list checking the flags to make sure MTCH was set,
* if not complain
*/
- for (pt = pathead; pt != NULL; pt = pt->fow) {
+ for (pt = pathead[INCLUDE]; pt != NULL; pt = pt->fow) {
if (pt->flgs & MTCH)
continue;
if (!wban) {
@@ -337,7 +350,7 @@
/*
* if no patterns just return
*/
- if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
+ if ((pathead[INCLUDE] == NULL) || ((pt = arcn->pat) == NULL))
return(0);
/*
@@ -414,8 +427,8 @@
* and the pattern rejects a member (i.e. it matched it) it is done.
* In effect we place the order of the flags as having -c last.
*/
- pt = pathead;
- ppt = &pathead;
+ pt = pathead[INCLUDE];
+ ppt = &pathead[INCLUDE];
while ((pt != NULL) && (pt != arcn->pat)) {
ppt = &(pt->fow);
pt = pt->fow;
@@ -444,29 +457,43 @@
* 0 if this archive member should be processed, 1 if it should be
* skipped and -1 if we are done with all patterns (and pax should quit
* looking for more members)
+ *
+ * when pat_type == INCLUDE
+ * - MATCH means proceed
+ * - NOT_MATCH means skip
+ * when pat_type == EXCLUDE
+ * - MATCH means skip
+ * - NOT_MATCH means proceed
+ * when no pattern
+ * - pat_type == INCLUDE
+ * -> proceed
+ * - pat_type == EXCLUDE
+ * -> proceed
*/
int
-pat_match(ARCHD *arcn)
+pat_match(ARCHD *arcn, int pat_type)
{
PATTERN *pt;
+ assert((pat_type == INCLUDE) || (pat_type == EXCLUDE));
+
arcn->pat = NULL;
/*
* if there are no more patterns and we have -n (and not -c) we are
* done. otherwise with no patterns to match, matches all
*/
- if (pathead == NULL) {
- if (nflag && !cflag)
- return(-1);
- return(0);
+ if (pathead[pat_type] == NULL) {
+ if (nflag && pat_type == INCLUDE)
+ return(DONE);
+ return(PROCEED);
}
/*
* have to search down the list one at a time looking for a match.
*/
- pt = pathead;
+ pt = pathead[pat_type];
while (pt != NULL) {
/*
* check for a file name match unless we have DIR_MTCH set in
@@ -487,26 +514,12 @@
pt = pt->fow;
}
- /*
- * return the result, remember that cflag (-c) inverts the sense of a
- * match
- */
- if (pt == NULL)
- return(cflag ? 0 : 1);
-
- /*
- * we had a match, now when we invert the sense (-c) we reject this
- * member. However we have to tag the pattern a being successful, (in a
- * match, not in selecting a archive member) so we call pat_sel() here.
- */
arcn->pat = pt;
- if (!cflag)
- return(0);
-
- if (pat_sel(arcn) < 0)
- return(-1);
- arcn->pat = NULL;
- return(1);
+ if (pt != NULL) {
+ return pat_type == INCLUDE ? SKIP : PROCEED;
+ } else {
+ return pat_type == INCLUDE ? PROCEED : SKIP;
+ }
}
/*
Index: tar.1
===================================================================
RCS file: /cvsroot/basesrc/bin/pax/tar.1,v
retrieving revision 1.6
diff -u -u -r1.6 tar.1
--- tar.1 2002/11/06 14:56:48 1.6
+++ tar.1 2002/12/05 18:44:25
@@ -231,6 +231,8 @@
.It Fl X Ar file , Fl -exclude-from Ar file
Exclude files listed in the given file.
Do not cross mount points in the file system.
+.It Fl -exclude Ar pattern
+Exclude files matching the pattern.
.It Fl Z , -compress , -uncompress
Compress archive using compress.
.It Fl -strict