Subject: Bug fixes and enhancements to cp(1)
To: None <current-users@sun-lamp.cs.berkeley.edu>
From: None <Mark_Weaver@brown.edu>
List: current-users
Date: 12/06/1993 01:40:08
I have made some fairly significant improvements to cp(1). Here is
the patch. For a description of the changes, look at the change to
the header comment, and the changes to the man page.
Mark
--------------------------------------------------------------------
Email: Mark_Weaver@brown.edu | Brown University
PGP Key: finger mhw@cs.brown.edu | Dept of Computer Science
diff -c bin/cp/cp.c.mhw1 bin/cp/cp.c
*** bin/cp/cp.c.mhw1 Wed Nov 10 05:46:26 1993
--- bin/cp/cp.c Mon Dec 6 01:05:29 1993
***************
*** 50,62 ****
*
* The global PATH_T structures "to" and "from" always contain paths to the
* current source and target files, respectively. Since cp does not change
! * directories, these paths can be either absolute or dot-realative.
*
* The basic algorithm is to initialize "to" and "from", and then call the
* recursive copy() function to do the actual work. If "from" is a file,
* copy copies the data. If "from" is a directory, copy creates the
* corresponding "to" directory, and calls itself recursively on all of
* the entries in the "from" directory.
*/
#include <sys/param.h>
--- 50,70 ----
*
* The global PATH_T structures "to" and "from" always contain paths to the
* current source and target files, respectively. Since cp does not change
! * directories, these paths can be either absolute or dot-relative.
*
* The basic algorithm is to initialize "to" and "from", and then call the
* recursive copy() function to do the actual work. If "from" is a file,
* copy copies the data. If "from" is a directory, copy creates the
* corresponding "to" directory, and calls itself recursively on all of
* the entries in the "from" directory.
+ *
+ * 12/6/93: Modifications made by Mark_Weaver@brown.edu
+ * - Option added (-l) to handle hard links properly
+ * - Bug fixed: when overwriting an existing file/directory with
+ * anything other than a normal file or directory, it would
+ * ignore the (-i) flag and unlink the file blindly. If the
+ * euid == 0, this could unlink directories improperly as well.
+ * - Replaced many fprintf() calls with equivalent err() calls.
*/
#include <sys/param.h>
***************
*** 72,77 ****
--- 80,98 ----
#include <string.h>
#include "extern.h"
+ #define HASH_TABLE_SIZE 511
+
+ typedef struct _cp_inode {
+ struct _cp_inode *next;
+ ino_t src_inode;
+ char dest_name[1];
+ } cp_inode;
+
+ static void hash_init __P((void));
+ static void hash_insert __P((ino_t, char *));
+ static cp_inode *hash_find __P((ino_t));
+ static int verify_unlink __P((void));
+ static int maybe_unlink __P((void));
static void copy __P((void));
static void copy_dir __P((void));
static void copy_fifo __P((struct stat *, int));
***************
*** 86,95 ****
uid_t myuid;
int exit_val, myumask;
! int iflag, pflag, orflag, rflag;
int (*statfcn)();
char *progname;
main(argc, argv)
int argc;
char **argv;
--- 107,156 ----
uid_t myuid;
int exit_val, myumask;
! int iflag, pflag, orflag, rflag, lflag;
int (*statfcn)();
char *progname;
+ cp_inode **hash_table;
+
+ static void
+ hash_init()
+ {
+ int i;
+
+ hash_table = (cp_inode **)malloc(HASH_TABLE_SIZE * sizeof(cp_inode *));
+ for (i=0;i<HASH_TABLE_SIZE;i++)
+ hash_table[i] = NULL;
+ }
+
+ static void
+ hash_insert(src, dest)
+ ino_t src;
+ char *dest;
+ {
+ int index;
+ cp_inode *new;
+
+ index = src % HASH_TABLE_SIZE;
+ new = (cp_inode *)malloc(sizeof(cp_inode) + strlen(dest));
+ new->next = hash_table[index];
+ new->src_inode = src;
+ strcpy(new->dest_name, dest);
+ hash_table[index] = new;
+ }
+
+ static cp_inode *
+ hash_find(src)
+ ino_t src;
+ {
+ cp_inode *p;
+
+ for (p = hash_table[src % HASH_TABLE_SIZE] ; p ; p = p->next)
+ if (p->src_inode == src)
+ return p;
+ return NULL;
+ }
+
main(argc, argv)
int argc;
char **argv;
***************
*** 107,113 ****
progname = (p = rindex(*argv,'/')) ? ++p : *argv;
symfollow = 0;
! while ((c = getopt(argc, argv, "Rfhipr")) != EOF) {
switch ((char)c) {
case 'f':
iflag = 0;
--- 168,174 ----
progname = (p = rindex(*argv,'/')) ? ++p : *argv;
symfollow = 0;
! while ((c = getopt(argc, argv, "Rfhlipr")) != EOF) {
switch ((char)c) {
case 'f':
iflag = 0;
***************
*** 115,120 ****
--- 176,184 ----
case 'h':
symfollow = 1;
break;
+ case 'l':
+ lflag = 1;
+ break;
case 'i':
iflag = isatty(fileno(stdin));
break;
***************
*** 140,149 ****
usage();
if (rflag && orflag) {
! (void)fprintf(stderr,
! "cp: the -R and -r options are mutually exclusive.\n");
exit(1);
}
myuid = getuid();
--- 204,215 ----
usage();
if (rflag && orflag) {
! err("the -R and -r options are mutually exclusive.");
exit(1);
}
+
+ if (lflag)
+ hash_init();
myuid = getuid();
***************
*** 208,219 ****
--- 274,314 ----
exit(exit_val);
}
+ static int
+ verify_unlink()
+ {
+ int checkch, ch;
+
+ if (iflag) {
+ (void)fprintf(stderr, "overwrite %s? ", to.p_path);
+ checkch = ch = getchar();
+ while (ch != '\n' && ch != EOF)
+ ch = getchar();
+ if (checkch != 'y' && checkch != 'Y')
+ return 0;
+ }
+ return 1;
+ }
+
+ static int
+ maybe_unlink()
+ {
+ if (!verify_unlink())
+ return 1;
+ if (unlink(to.p_path)) {
+ err("unlink: %s: %s", to.p_path, strerror(errno));
+ return 1;
+ }
+ return 0;
+ }
+
/* copy file or directory at "from" to "to". */
static void
copy()
{
struct stat from_stat, to_stat;
int dne, statval;
+ cp_inode *cpi;
statval = statfcn(from.p_path, &from_stat);
if (statval == -1) {
***************
*** 227,240 ****
else {
if (to_stat.st_dev == from_stat.st_dev &&
to_stat.st_ino == from_stat.st_ino) {
! (void)fprintf(stderr,
! "%s: %s and %s are identical (not copied).\n",
! progname, to.p_path, from.p_path);
exit_val = 1;
return;
}
dne = 0;
}
switch(from_stat.st_mode & S_IFMT) {
case S_IFLNK:
--- 322,355 ----
else {
if (to_stat.st_dev == from_stat.st_dev &&
to_stat.st_ino == from_stat.st_ino) {
! err("%s and %s are identical (not copied).",
! to.p_path, from.p_path);
! exit_val = 1;
! return;
! }
! if ((to_stat.st_mode & S_IFMT) == S_IFDIR &&
! (from_stat.st_mode & S_IFMT) != S_IFDIR) {
! err("directory %s already exists (not removed).",
! to.p_path);
exit_val = 1;
return;
}
dne = 0;
}
+
+ if (lflag) {
+ if (cpi = hash_find(from_stat.st_ino)) {
+ if (!dne && maybe_unlink())
+ return;
+ if (link(cpi->dest_name, to.p_path) == -1) {
+ err("link: %s: %s", to.p_path, strerror(errno));
+ return;
+ }
+ return;
+ }
+ else if(from_stat.st_nlink > 1)
+ hash_insert(from_stat.st_ino, to.p_path);
+ }
switch(from_stat.st_mode & S_IFMT) {
case S_IFLNK:
***************
*** 242,250 ****
return;
case S_IFDIR:
if (!rflag && !orflag) {
! (void)fprintf(stderr,
! "%s: %s is a directory (not copied).\n",
! progname, from.p_path);
exit_val = 1;
return;
}
--- 357,363 ----
return;
case S_IFDIR:
if (!rflag && !orflag) {
! err("%s is a directory (not copied).", from.p_path);
exit_val = 1;
return;
}
***************
*** 263,270 ****
}
}
else if (!S_ISDIR(to_stat.st_mode)) {
! (void)fprintf(stderr, "%s: %s: not a directory.\n",
! progname, to.p_path);
return;
}
copy_dir();
--- 376,382 ----
}
}
else if (!S_ISDIR(to_stat.st_mode)) {
! err("%s: not a directory.", to.p_path);
return;
}
copy_dir();
***************
*** 319,335 ****
* modified by the umask.)
*/
if (!dne) {
! if (iflag) {
! int checkch, ch;
!
! (void)fprintf(stderr, "overwrite %s? ", to.p_path);
! checkch = ch = getchar();
! while (ch != '\n' && ch != EOF)
! ch = getchar();
! if (checkch != 'y' && checkch != 'Y') {
! (void)close(from_fd);
! return;
! }
}
to_fd = open(to.p_path, O_WRONLY|O_TRUNC, 0);
} else
--- 431,439 ----
* modified by the umask.)
*/
if (!dne) {
! if (!verify_unlink()) {
! (void)close(from_fd);
! return;
}
to_fd = open(to.p_path, O_WRONLY|O_TRUNC, 0);
} else
***************
*** 394,401 ****
dir_cnt = scandir(from.p_path, &dir_list, NULL, NULL);
if (dir_cnt == -1) {
! (void)fprintf(stderr, "%s: can't read directory %s.\n",
! progname, from.p_path);
exit_val = 1;
}
--- 498,504 ----
dir_cnt = scandir(from.p_path, &dir_list, NULL, NULL);
if (dir_cnt == -1) {
! err("can't read directory %s.", from.p_path);
exit_val = 1;
}
***************
*** 473,482 ****
return;
}
link[len] = '\0';
! if (exists && unlink(to.p_path)) {
! err("unlink: %s: %s", to.p_path, strerror(errno));
return;
- }
if (symlink(link, to.p_path)) {
err("symlink: %s: %s", link, strerror(errno));
return;
--- 576,583 ----
return;
}
link[len] = '\0';
! if (exists && maybe_unlink())
return;
if (symlink(link, to.p_path)) {
err("symlink: %s: %s", link, strerror(errno));
return;
***************
*** 488,497 ****
struct stat *from_stat;
int exists;
{
! if (exists && unlink(to.p_path)) {
! err("unlink: %s: %s", to.p_path, strerror(errno));
return;
- }
if (mkfifo(to.p_path, from_stat->st_mode)) {
err("mkfifo: %s: %s", to.p_path, strerror(errno));
return;
--- 589,596 ----
struct stat *from_stat;
int exists;
{
! if (exists && maybe_unlink())
return;
if (mkfifo(to.p_path, from_stat->st_mode)) {
err("mkfifo: %s: %s", to.p_path, strerror(errno));
return;
***************
*** 505,514 ****
struct stat *from_stat;
int exists;
{
! if (exists && unlink(to.p_path)) {
! err("unlink: %s: %s", to.p_path, strerror(errno));
return;
- }
if (mknod(to.p_path, from_stat->st_mode, from_stat->st_rdev)) {
err("mknod: %s: %s", to.p_path, strerror(errno));
return;
--- 604,611 ----
struct stat *from_stat;
int exists;
{
! if (exists && maybe_unlink())
return;
if (mknod(to.p_path, from_stat->st_mode, from_stat->st_rdev)) {
err("mknod: %s: %s", to.p_path, strerror(errno));
return;
diff -c bin/cp/cp.1.mhw1 bin/cp/cp.1
*** bin/cp/cp.1.mhw1 Sun Nov 7 02:12:19 1993
--- bin/cp/cp.1 Mon Dec 6 01:38:55 1993
***************
*** 43,52 ****
.Nd copy files
.Sh SYNOPSIS
.Nm cp
! .Op Fl Rfhip
.Ar source_file target_file
.Nm cp
! .Op Fl Rfhip
.Ar source_file ... target_directory
.Sh DESCRIPTION
In the first synopsis form, the
--- 43,52 ----
.Nd copy files
.Sh SYNOPSIS
.Nm cp
! .Op Fl Rfhlip
.Ar source_file target_file
.Nm cp
! .Op Fl Rfhlip
.Ar source_file ... target_directory
.Sh DESCRIPTION
In the first synopsis form, the
***************
*** 95,100 ****
--- 95,106 ----
Provided for the
.Fl R
option which does not follow symbolic links by default.
+ .It Fl l
+ Causes
+ .Nm cp
+ to attempt to handle hard links correctly. A hash table is maintained
+ of all inodes copied, and if a duplicate is found, a hard link is
+ created.
.It Fl i
Causes
.Nm cp
***************
*** 153,159 ****
.Sh SEE ALSO
.Xr mv 1 ,
.Xr rcp 1 ,
! .Xr umask 2
.Sh HISTORY
The
.Nm cp
--- 159,183 ----
.Sh SEE ALSO
.Xr mv 1 ,
.Xr rcp 1 ,
! .Xr ln 1 ,
! .Xr umask 2 ,
! .Xr link 2
! .Sh BUGS
! When the
! .Fl l
! option is used, and a source file is not copied for any reason
! (an existing file couldn't be overwritten), its inode is
! still recorded in the hash table. If another file is copied later
! with a matching inode, a hard link will be made to the original
! existing file.
! .Pp
! Also, when the
! .Fl l
! option is used, and directories are copied onto more than one
! destination volume,
! .Nm cp
! may attempt to create some cross-device links, which will obviously
! fail.
.Sh HISTORY
The
.Nm cp
------------------------------------------------------------------------------