Subject: Re: tail(1) fix for large files
To: Michael Graff <explorer@flame.org>
From: James Chacon <jchacon@genuity.net>
List: tech-userlevel
Date: 11/14/2001 14:49:36
Looks good to me
James
>
>
>Here's a patch I made to the -current version of tail(1), which
>gets around a long-standing and damned annoying problem that
>tail won't work on files greater than 2 GB, and sometimes less.
>
>The old "tail -f" and "tail -1234" formats would mmap() the file
>and search backwards. The problem is, if the file is too big,
>the mmap() fails.
>
>The change I made was to walk backwards in 10 MB chunks, unmapping
>and remapping as needed, until the front of the file is found or
>the correct number of lines was found.
>
>I should probably walk the file again in the forward direction,
>using a mmap(), print, munmap() method, but I don't. I let the
>caller deal with that -- it uses a rather bad loop of fgetc()
>and fputc() to print out the remainder of the file. In practice, it
>probably doesn't matter, since 10 MB is large, and the file _is_
>output, just less efficiently than it could be.
>
>If I can get a few others to sanity check these diffs, I'll commit
>them ASAP. I did them on little sleep, so I could miss some sort of
>edge condition (off by one will get me every damned time.)
>
>--Michael
>
>Index: forward.c
>===================================================================
>RCS file: /cvsroot/basesrc/usr.bin/tail/forward.c,v
>retrieving revision 1.16
>diff -u -r1.16 forward.c
>--- forward.c 1999/07/21 06:38:49 1.16
>+++ forward.c 2001/11/13 01:02:07
>@@ -249,40 +249,79 @@
> long off;
> struct stat *sbp;
> {
>- off_t size;
>+ off_t file_size;
>+ off_t file_remaining;
> char *p;
> char *start;
>+ off_t mmap_size;
>+ off_t mmap_offset;
>+ off_t mmap_remaining;
>
>- if (!(size = sbp->st_size))
>+#define MMAP_MAXSIZE (10 * 1024 * 1024)
>+
>+ if (!(file_size = sbp->st_size))
> return (0);
>+ file_remaining = file_size;
>
>- if (size > SIZE_T_MAX) {
>- err(0, "%s: %s", fname, strerror(EFBIG));
>- return (1);
>+ if (mmap_size > MMAP_MAXSIZE) {
>+ mmap_size = MMAP_MAXSIZE;
>+ mmap_offset = file_size - MMAP_MAXSIZE;
>+ } else {
>+ mmap_size = file_size;
>+ mmap_offset = 0;
> }
>
>- if ((start = mmap(NULL, (size_t)size, PROT_READ,
>- MAP_FILE|MAP_SHARED, fileno(fp), (off_t)0)) == (caddr_t)-1) {
>- err(0, "%s: %s", fname, strerror(EFBIG));
>- return (1);
>- }
>+ while (off) {
>+ start = mmap(NULL, (size_t)mmap_size, PROT_READ,
>+ MAP_FILE|MAP_SHARED, fileno(fp), mmap_offset);
>+ if (start == MAP_FAILED) {
>+ err(0, "%s: %s", fname, strerror(EFBIG));
>+ return (1);
>+ }
>+
>+ mmap_remaining = mmap_size;
>+ /* Last char is special, ignore whether newline or not. */
>+ for (p = start + mmap_remaining - 1 ; --mmap_remaining ; )
>+ if (*--p == '\n' && !--off) {
>+ ++p;
>+ break;
>+ }
>
>- /* Last char is special, ignore whether newline or not. */
>- for (p = start + size - 1; --size;)
>- if (*--p == '\n' && !--off) {
>- ++p;
>+ file_remaining -= mmap_size - mmap_remaining;
>+
>+ if (off == 0)
> break;
>+
>+ if (munmap(start, mmap_size)) {
>+ err(0, "%s: %s", fname, strerror(errno));
>+ return (1);
> }
>
>- /* Set the file pointer to reflect the length displayed. */
>- size = sbp->st_size - size;
>- WR(p, size);
>- if (fseek(fp, (long)sbp->st_size, SEEK_SET) == -1) {
>- ierr();
>- return (1);
>+ if (mmap_offset >= MMAP_MAXSIZE) {
>+ mmap_offset -= MMAP_MAXSIZE;
>+ } else {
>+ mmap_offset = 0;
>+ mmap_size = file_remaining;
>+ }
> }
>- if (munmap(start, (size_t)sbp->st_size)) {
>+
>+ /*
>+ * Output the (perhaps partial) data in this mmap'd block.
>+ */
>+ WR(p, mmap_size - mmap_remaining);
>+ file_remaining += mmap_size - mmap_remaining;
>+ if (munmap(start, mmap_size)) {
> err(0, "%s: %s", fname, strerror(errno));
>+ return (1);
>+ }
>+
>+ /*
>+ * Set the file pointer to reflect the length displayed.
>+ * This will cause the caller to redisplay the data if/when
>+ * needed.
>+ */
>+ if (fseeko(fp, file_remaining, SEEK_SET) == -1) {
>+ ierr();
> return (1);
> }
> return (0);
>
>
>
>