Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/usr.bin/gzip Add -l support for xz files
details: https://anonhg.NetBSD.org/src/rev/04be77112c83
branches: trunk
changeset: 433822:04be77112c83
user: martin <martin%NetBSD.org@localhost>
date: Sat Oct 06 16:36:45 2018 +0000
description:
Add -l support for xz files
diffstat:
usr.bin/gzip/gzip.c | 18 ++-
usr.bin/gzip/unxz.c | 323 +++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 334 insertions(+), 7 deletions(-)
diffs (truncated from 402 to 300 lines):
diff -r ae0b4ee0775f -r 04be77112c83 usr.bin/gzip/gzip.c
--- a/usr.bin/gzip/gzip.c Sat Oct 06 16:28:21 2018 +0000
+++ b/usr.bin/gzip/gzip.c Sat Oct 06 16:36:45 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: gzip.c,v 1.113 2018/06/12 00:42:17 kamil Exp $ */
+/* $NetBSD: gzip.c,v 1.114 2018/10/06 16:36:45 martin Exp $ */
/*
* Copyright (c) 1997, 1998, 2003, 2004, 2006, 2008, 2009, 2010, 2011, 2015, 2017
@@ -31,7 +31,7 @@
#ifndef lint
__COPYRIGHT("@(#) Copyright (c) 1997, 1998, 2003, 2004, 2006, 2008,\
2009, 2010, 2011, 2015, 2017 Matthew R. Green. All rights reserved.");
-__RCSID("$NetBSD: gzip.c,v 1.113 2018/06/12 00:42:17 kamil Exp $");
+__RCSID("$NetBSD: gzip.c,v 1.114 2018/10/06 16:36:45 martin Exp $");
#endif /* not lint */
/*
@@ -213,6 +213,7 @@
static const suffixes_t *check_suffix(char *, int);
static ssize_t read_retry(int, void *, size_t);
static ssize_t write_retry(int, const void *, size_t);
+static void print_list_out(off_t, off_t, const char*);
#ifdef SMALL
#define infile_set(f,t) infile_set(f)
@@ -256,6 +257,7 @@
#ifndef NO_XZ_SUPPORT
static off_t unxz(int, int, char *, size_t, off_t *);
+static off_t unxz_len(int);
#endif
#ifdef SMALL
@@ -1579,10 +1581,10 @@
#ifndef NO_XZ_SUPPORT
case FT_XZ:
if (lflag) {
- maybe_warnx("no -l with xz files");
- goto lose;
+ size = unxz_len(fd);
+ print_list_out(in_size, size, file);
+ return -1;
}
-
size = unxz(fd, zfd, NULL, 0, NULL);
break;
#endif
@@ -2147,6 +2149,12 @@
in_tot += in;
out_tot += out;
#endif
+ print_list_out(out, in, outfile);
+}
+
+static void
+print_list_out(off_t out, off_t in, const char *outfile)
+{
printf("%12llu %12llu ", (unsigned long long)out, (unsigned long long)in);
print_ratio(in, out, stdout);
printf(" %s\n", outfile);
diff -r ae0b4ee0775f -r 04be77112c83 usr.bin/gzip/unxz.c
--- a/usr.bin/gzip/unxz.c Sat Oct 06 16:28:21 2018 +0000
+++ b/usr.bin/gzip/unxz.c Sat Oct 06 16:36:45 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: unxz.c,v 1.7 2017/08/04 07:27:08 mrg Exp $ */
+/* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */
/*-
* Copyright (c) 2011 The NetBSD Foundation, Inc.
@@ -29,7 +29,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
-__RCSID("$NetBSD: unxz.c,v 1.7 2017/08/04 07:27:08 mrg Exp $");
+__RCSID("$NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $");
#include <stdarg.h>
#include <errno.h>
@@ -154,3 +154,322 @@
}
}
}
+
+#include <stdbool.h>
+
+/*
+ * Copied various bits and pieces from xz support code or brute force
+ * replacements.
+ */
+
+#define my_min(A,B) ((A)<(B)?(A):(B))
+
+// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
+// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
+#if BUFSIZ <= 1024
+# define IO_BUFFER_SIZE 8192
+#else
+# define IO_BUFFER_SIZE (BUFSIZ & ~7U)
+#endif
+
+/// is_sparse() accesses the buffer as uint64_t for maximum speed.
+/// Use an union to make sure that the buffer is properly aligned.
+typedef union {
+ uint8_t u8[IO_BUFFER_SIZE];
+ uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
+ uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
+} io_buf;
+
+
+static bool
+io_pread(int fd, io_buf *buf, size_t size, off_t pos)
+{
+ // Using lseek() and read() is more portable than pread() and
+ // for us it is as good as real pread().
+ if (lseek(fd, pos, SEEK_SET) != pos) {
+ return true;
+ }
+
+ const size_t amount = read(fd, buf, size);
+ if (amount == SIZE_MAX)
+ return true;
+
+ if (amount != size) {
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Most of the following is copied (mostly verbatim) from the xz
+ * distribution, from file src/xz/list.c
+ */
+
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file list.c
+/// \brief Listing information about .xz files
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+/// Information about a .xz file
+typedef struct {
+ /// Combined Index of all Streams in the file
+ lzma_index *idx;
+
+ /// Total amount of Stream Padding
+ uint64_t stream_padding;
+
+ /// Highest memory usage so far
+ uint64_t memusage_max;
+
+ /// True if all Blocks so far have Compressed Size and
+ /// Uncompressed Size fields
+ bool all_have_sizes;
+
+ /// Oldest XZ Utils version that will decompress the file
+ uint32_t min_version;
+
+} xz_file_info;
+
+#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
+
+
+/// \brief Parse the Index(es) from the given .xz file
+///
+/// \param xfi Pointer to structure where the decoded information
+/// is stored.
+/// \param pair Input file
+///
+/// \return On success, false is returned. On error, true is returned.
+///
+// TODO: This function is pretty big. liblzma should have a function that
+// takes a callback function to parse the Index(es) from a .xz file to make
+// it easy for applications.
+static bool
+parse_indexes(xz_file_info *xfi, int src_fd)
+{
+ struct stat st;
+
+ fstat(src_fd, &st);
+ if (st.st_size <= 0) {
+ return true;
+ }
+
+ if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
+ return true;
+ }
+
+ io_buf buf;
+ lzma_stream_flags header_flags;
+ lzma_stream_flags footer_flags;
+ lzma_ret ret;
+
+ // lzma_stream for the Index decoder
+ lzma_stream strm = LZMA_STREAM_INIT;
+
+ // All Indexes decoded so far
+ lzma_index *combined_index = NULL;
+
+ // The Index currently being decoded
+ lzma_index *this_index = NULL;
+
+ // Current position in the file. We parse the file backwards so
+ // initialize it to point to the end of the file.
+ off_t pos = st.st_size;
+
+ // Each loop iteration decodes one Index.
+ do {
+ // Check that there is enough data left to contain at least
+ // the Stream Header and Stream Footer. This check cannot
+ // fail in the first pass of this loop.
+ if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
+ goto error;
+ }
+
+ pos -= LZMA_STREAM_HEADER_SIZE;
+ lzma_vli stream_padding = 0;
+
+ // Locate the Stream Footer. There may be Stream Padding which
+ // we must skip when reading backwards.
+ while (true) {
+ if (pos < LZMA_STREAM_HEADER_SIZE) {
+ goto error;
+ }
+
+ if (io_pread(src_fd, &buf,
+ LZMA_STREAM_HEADER_SIZE, pos))
+ goto error;
+
+ // Stream Padding is always a multiple of four bytes.
+ int i = 2;
+ if (buf.u32[i] != 0)
+ break;
+
+ // To avoid calling io_pread() for every four bytes
+ // of Stream Padding, take advantage that we read
+ // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
+ // check them too before calling io_pread() again.
+ do {
+ stream_padding += 4;
+ pos -= 4;
+ --i;
+ } while (i >= 0 && buf.u32[i] == 0);
+ }
+
+ // Decode the Stream Footer.
+ ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
+ if (ret != LZMA_OK) {
+ goto error;
+ }
+
+ // Check that the Stream Footer doesn't specify something
+ // that we don't support. This can only happen if the xz
+ // version is older than liblzma and liblzma supports
+ // something new.
+ //
+ // It is enough to check Stream Footer. Stream Header must
+ // match when it is compared against Stream Footer with
+ // lzma_stream_flags_compare().
+ if (footer_flags.version != 0) {
+ goto error;
+ }
+
+ // Check that the size of the Index field looks sane.
+ lzma_vli index_size = footer_flags.backward_size;
+ if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
+ goto error;
+ }
+
+ // Set pos to the beginning of the Index.
+ pos -= index_size;
+
+ // Decode the Index.
+ ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
+ if (ret != LZMA_OK) {
+ goto error;
+ }
+
+ do {
+ // Don't give the decoder more input than the
+ // Index size.
+ strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
+ if (io_pread(src_fd, &buf, strm.avail_in, pos))
+ goto error;
+
+ pos += strm.avail_in;
+ index_size -= strm.avail_in;
+
+ strm.next_in = buf.u8;
+ ret = lzma_code(&strm, LZMA_RUN);
+
+ } while (ret == LZMA_OK);
Home |
Main Index |
Thread Index |
Old Index