Subject: tail(1) fix for large files
To: None <tech-userlevel@netbsd.org>
From: Michael Graff <explorer@flame.org>
List: tech-userlevel
Date: 11/12/2001 17:11:39
Here's a patch I made to the -current version of tail(1), which
gets around a long-standing and damned annoying problem that
tail won't work on files greater than 2 GB, and sometimes less.
The old "tail -f" and "tail -1234" formats would mmap() the file
and search backwards. The problem is, if the file is too big,
the mmap() fails.
The change I made was to walk backwards in 10 MB chunks, unmapping
and remapping as needed, until the front of the file is found or
the correct number of lines was found.
I should probably walk the file again in the forward direction,
using a mmap(), print, munmap() method, but I don't. I let the
caller deal with that -- it uses a rather bad loop of fgetc()
and fputc() to print out the remainder of the file. In practice, it
probably doesn't matter, since 10 MB is large, and the file _is_
output, just less efficiently than it could be.
If I can get a few others to sanity check these diffs, I'll commit
them ASAP. I did them on little sleep, so I could miss some sort of
edge condition (off by one will get me every damned time.)
--Michael
Index: forward.c
===================================================================
RCS file: /cvsroot/basesrc/usr.bin/tail/forward.c,v
retrieving revision 1.16
diff -u -r1.16 forward.c
--- forward.c 1999/07/21 06:38:49 1.16
+++ forward.c 2001/11/13 01:02:07
@@ -249,40 +249,79 @@
long off;
struct stat *sbp;
{
- off_t size;
+ off_t file_size;
+ off_t file_remaining;
char *p;
char *start;
+ off_t mmap_size;
+ off_t mmap_offset;
+ off_t mmap_remaining;
- if (!(size = sbp->st_size))
+#define MMAP_MAXSIZE (10 * 1024 * 1024)
+
+ if (!(file_size = sbp->st_size))
return (0);
+ file_remaining = file_size;
- if (size > SIZE_T_MAX) {
- err(0, "%s: %s", fname, strerror(EFBIG));
- return (1);
+ if (mmap_size > MMAP_MAXSIZE) {
+ mmap_size = MMAP_MAXSIZE;
+ mmap_offset = file_size - MMAP_MAXSIZE;
+ } else {
+ mmap_size = file_size;
+ mmap_offset = 0;
}
- if ((start = mmap(NULL, (size_t)size, PROT_READ,
- MAP_FILE|MAP_SHARED, fileno(fp), (off_t)0)) == (caddr_t)-1) {
- err(0, "%s: %s", fname, strerror(EFBIG));
- return (1);
- }
+ while (off) {
+ start = mmap(NULL, (size_t)mmap_size, PROT_READ,
+ MAP_FILE|MAP_SHARED, fileno(fp), mmap_offset);
+ if (start == MAP_FAILED) {
+ err(0, "%s: %s", fname, strerror(EFBIG));
+ return (1);
+ }
+
+ mmap_remaining = mmap_size;
+ /* Last char is special, ignore whether newline or not. */
+ for (p = start + mmap_remaining - 1 ; --mmap_remaining ; )
+ if (*--p == '\n' && !--off) {
+ ++p;
+ break;
+ }
- /* Last char is special, ignore whether newline or not. */
- for (p = start + size - 1; --size;)
- if (*--p == '\n' && !--off) {
- ++p;
+ file_remaining -= mmap_size - mmap_remaining;
+
+ if (off == 0)
break;
+
+ if (munmap(start, mmap_size)) {
+ err(0, "%s: %s", fname, strerror(errno));
+ return (1);
}
- /* Set the file pointer to reflect the length displayed. */
- size = sbp->st_size - size;
- WR(p, size);
- if (fseek(fp, (long)sbp->st_size, SEEK_SET) == -1) {
- ierr();
- return (1);
+ if (mmap_offset >= MMAP_MAXSIZE) {
+ mmap_offset -= MMAP_MAXSIZE;
+ } else {
+ mmap_offset = 0;
+ mmap_size = file_remaining;
+ }
}
- if (munmap(start, (size_t)sbp->st_size)) {
+
+ /*
+ * Output the (perhaps partial) data in this mmap'd block.
+ */
+ WR(p, mmap_size - mmap_remaining);
+ file_remaining += mmap_size - mmap_remaining;
+ if (munmap(start, mmap_size)) {
err(0, "%s: %s", fname, strerror(errno));
+ return (1);
+ }
+
+ /*
+ * Set the file pointer to reflect the length displayed.
+ * This will cause the caller to redisplay the data if/when
+ * needed.
+ */
+ if (fseeko(fp, file_remaining, SEEK_SET) == -1) {
+ ierr();
return (1);
}
return (0);