tech-userlevel archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
(De-)Compression support for makemandb
Hi all,
attached patches allows makemandb to process compressed man pages.
All the normal formats (compress, gzip, bzip2, xz) are handled.
Joerg
Index: Makefile
===================================================================
RCS file: /home/joerg/repo/netbsd/src/usr.sbin/makemandb/Makefile,v
retrieving revision 1.1
diff -u -p -r1.1 Makefile
--- Makefile 7 Feb 2012 19:13:32 -0000 1.1
+++ Makefile 15 Feb 2012 14:20:45 -0000
@@ -22,8 +22,8 @@ CPPFLAGS+=-I${MDIST} -I${.OBJDIR}
MDOCMLOBJDIR!= cd ${MDOCDIR}/lib/libmandoc && ${PRINTOBJDIR}
MDOCMLLIB= ${MDOCMLOBJDIR}/libmandoc.a
-DPADD.makemandb+= ${MDOCMLLIB}
-LDADD.makemandb+= -L${MDOCMLOBJDIR} -lmandoc
+DPADD.makemandb+= ${MDOCMLLIB} ${LIBARCHIVE} ${LIBBZ2} ${LIBLZMA}
+LDADD.makemandb+= -L${MDOCMLOBJDIR} -lmandoc -larchive -lbz2 -llzma
DPADD+= ${LIBSQLITE3} ${LIBM} ${LIBZ} ${LIBUTIL}
LDADD+= -lsqlite3 -lm -lz -lutil
Index: makemandb.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/usr.sbin/makemandb/makemandb.c,v
retrieving revision 1.2
diff -u -p -r1.2 makemandb.c
--- makemandb.c 7 Feb 2012 19:17:16 -0000 1.2
+++ makemandb.c 15 Feb 2012 15:42:06 -0000
@@ -26,6 +26,7 @@ __RCSID("$NetBSD: makemandb.c,v 1.2 2012
#include <ctype.h>
#include <dirent.h>
#include <err.h>
+#include <archive.h>
#include <md5.h>
#include <stdio.h>
#include <stdlib.h>
@@ -93,12 +94,13 @@ typedef struct mandb_rec {
static void append(secbuff *sbuff, const char *src);
static void init_secbuffs(mandb_rec *);
static void free_secbuffs(mandb_rec *);
-static int check_md5(const char *, sqlite3 *, const char *, char **);
+static int check_md5(const char *, sqlite3 *, const char *, char **, void *,
size_t);
static void cleanup(mandb_rec *);
static void set_section(const struct mdoc *, const struct man *, mandb_rec *);
static void set_machine(const struct mdoc *, mandb_rec *);
static int insert_into_db(sqlite3 *, mandb_rec *);
-static void begin_parse(const char *, struct mparse *, mandb_rec *);
+static void begin_parse(const char *, struct mparse *, mandb_rec *,
+ const void *, size_t len);
static void pmdoc_node(const struct mdoc_node *, mandb_rec *);
static void pmdoc_Nm(const struct mdoc_node *, mandb_rec *);
static void pmdoc_Nd(const struct mdoc_node *, mandb_rec *);
@@ -587,6 +589,59 @@ update_existing_entry(sqlite3 *db, const
sqlite3_finalize(inner_stmt);
}
+/* read_and_decompress --
+ * Reads the given file into memory. If it is compressed, decompres
+ * it before returning to the caller.
+ */
+static int
+read_and_decompress(const char *file, void **buf, size_t *len)
+{
+ size_t off;
+ ssize_t r;
+ struct archive *a;
+ struct archive_entry *ae;
+
+ if ((a = archive_read_new()) == NULL)
+ errx(EXIT_FAILURE, "memory allocation failed");
+
+ if (archive_read_support_compression_all(a) != ARCHIVE_OK ||
+ archive_read_support_format_raw(a) != ARCHIVE_OK ||
+ archive_read_open_filename(a, file, 65536) != ARCHIVE_OK ||
+ archive_read_next_header(a, &ae) != ARCHIVE_OK)
+ goto archive_error;
+ *len = 65536;
+ *buf = emalloc(*len);
+ off = 0;
+ for (;;) {
+ r = archive_read_data(a, (char *)*buf + off, *len - off);
+ if (r == ARCHIVE_OK) {
+ archive_read_close(a);
+ *len = off;
+ return 0;
+ }
+ if (r <= 0) {
+ free(*buf);
+ break;
+ }
+ off += r;
+ if (off == *len) {
+ *len *= 2;
+ if (*len < off) {
+ warnx("File too large: %s", file);
+ free(*buf);
+ archive_read_close(a);
+ return -1;
+ }
+ *buf = erealloc(*buf, *len);
+ }
+ }
+
+archive_error:
+ warnx("Error while reading `%s': %s", file, archive_error_string(a));
+ archive_read_close(a);
+ return -1;
+}
+
/* update_db --
* Does an incremental updation of the database by checking the file_cache.
* It parses and adds the pages which are present in file_cache,
@@ -601,7 +656,9 @@ update_db(sqlite3 *db, struct mparse *mp
sqlite3_stmt *stmt = NULL;
const char *file;
char *errmsg = NULL;
- char *buf = NULL;
+ char *md5sum;
+ void *buf;
+ size_t buflen;
int new_count = 0; /* Counter for newly indexed/updated pages */
int total_count = 0; /* Counter for total number of pages */
int err_count = 0; /* Counter for number of failed pages */
@@ -619,14 +676,21 @@ update_db(sqlite3 *db, struct mparse *mp
errx(EXIT_FAILURE, "Could not query file cache");
}
+ buf = NULL;
while (sqlite3_step(stmt) == SQLITE_ROW) {
+ free(buf);
total_count++;
rec->device = sqlite3_column_int64(stmt, 0);
rec->inode = sqlite3_column_int64(stmt, 1);
rec->mtime = sqlite3_column_int64(stmt, 2);
file = (const char *) sqlite3_column_text(stmt, 3);
- md5_status = check_md5(file, db, "mandb_meta", &buf);
- assert(buf != NULL);
+ if (read_and_decompress(file, &buf, &buflen)) {
+ err_count++;
+ buf = NULL;
+ continue;
+ }
+ md5_status = check_md5(file, db, "mandb_meta", &md5sum, buf,
buflen);
+ assert(md5sum != NULL);
if (md5_status == -1) {
warnx("An error occurred in checking md5 value"
" for file %s", file);
@@ -642,13 +706,13 @@ update_db(sqlite3 *db, struct mparse *mp
struct stat sb;
stat(file, &sb);
if (S_ISLNK(sb.st_mode)) {
- free(buf);
+ free(md5sum);
link_count++;
continue;
}
- update_existing_entry(db, file, buf, rec,
+ update_existing_entry(db, file, md5sum, rec,
&new_count, &link_count, &err_count);
- free(buf);
+ free(md5sum);
continue;
}
@@ -660,10 +724,10 @@ update_db(sqlite3 *db, struct mparse *mp
*/
if (mflags.verbosity > 1)
printf("Parsing: %s\n", file);
- rec->md5_hash = buf;
+ rec->md5_hash = md5sum;
rec->file_path = estrdup(file);
// file_path is freed by insert_into_db itself.
- begin_parse(file, mp, rec);
+ begin_parse(file, mp, rec, buf, buflen);
if (insert_into_db(db, rec) < 0) {
warnx("Error in indexing %s", file);
err_count++;
@@ -672,7 +736,8 @@ update_db(sqlite3 *db, struct mparse *mp
}
}
}
-
+ free(buf);
+
sqlite3_finalize(stmt);
if (mflags.verbosity) {
@@ -711,7 +776,8 @@ update_db(sqlite3 *db, struct mparse *mp
* parses the man page using libmandoc
*/
static void
-begin_parse(const char *file, struct mparse *mp, mandb_rec *rec)
+begin_parse(const char *file, struct mparse *mp, mandb_rec *rec,
+ const void *buf, size_t len)
{
struct mdoc *mdoc;
struct man *man;
@@ -719,7 +785,7 @@ begin_parse(const char *file, struct mpa
rec->xr_found = 0;
- if (mparse_readfd(mp, -1, file) >= MANDOCLEVEL_FATAL) {
+ if (mparse_readmem(mp, buf, len, file) >= MANDOCLEVEL_FATAL) {
warnx("%s: Parse failure", file);
return;
}
@@ -1675,7 +1741,8 @@ insert_into_db(sqlite3 *db, mandb_rec *r
* 1: If the hash exists in the database.
*/
static int
-check_md5(const char *file, sqlite3 *db, const char *table, char **buf)
+check_md5(const char *file, sqlite3 *db, const char *table, char **md5sum,
+ void *buf, size_t buflen)
{
int rc = 0;
int idx = -1;
@@ -1683,8 +1750,8 @@ check_md5(const char *file, sqlite3 *db,
sqlite3_stmt *stmt = NULL;
assert(file != NULL);
- *buf = MD5File(file, NULL);
- if (*buf == NULL) {
+ *md5sum = MD5Data(buf, buflen, NULL);
+ if (*md5sum == NULL) {
warn("md5 failed: %s", file);
return -1;
}
@@ -1694,19 +1761,19 @@ check_md5(const char *file, sqlite3 *db,
rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL);
if (rc != SQLITE_OK) {
free(sqlstr);
- free(*buf);
- *buf = NULL;
+ free(*md5sum);
+ *md5sum = NULL;
return -1;
}
idx = sqlite3_bind_parameter_index(stmt, ":md5_hash");
- rc = sqlite3_bind_text(stmt, idx, *buf, -1, NULL);
+ rc = sqlite3_bind_text(stmt, idx, *md5sum, -1, NULL);
if (rc != SQLITE_OK) {
warnx("%s", sqlite3_errmsg(db));
sqlite3_finalize(stmt);
free(sqlstr);
- free(*buf);
- *buf = NULL;
+ free(*md5sum);
+ *md5sum = NULL;
return -1;
}
Index: mandoc.h
===================================================================
RCS file: /home/joerg/repo/netbsd/src/external/bsd/mdocml/dist/mandoc.h,v
retrieving revision 1.1.1.9
diff -u -p -r1.1.1.9 mandoc.h
--- mandoc.h 30 Jan 2012 16:44:19 -0000 1.1.1.9
+++ mandoc.h 15 Feb 2012 13:54:00 -0000
@@ -418,6 +418,8 @@ struct mparse *mparse_alloc(enum mparse
void mparse_free(struct mparse *);
void mparse_keep(struct mparse *);
enum mandoclevel mparse_readfd(struct mparse *, int, const char *);
+enum mandoclevel mparse_readmem(struct mparse *, const void *, size_t,
+ const char *);
void mparse_reset(struct mparse *);
void mparse_result(struct mparse *,
struct mdoc **, struct man **);
Index: read.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/external/bsd/mdocml/dist/read.c,v
retrieving revision 1.5
diff -u -p -r1.5 read.c
--- read.c 6 Feb 2012 10:42:44 -0000 1.5
+++ read.c 15 Feb 2012 13:55:55 -0000
@@ -28,6 +28,7 @@
#include <ctype.h>
#include <fcntl.h>
#include <stdarg.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -37,6 +38,7 @@
#include "libmandoc.h"
#include "mdoc.h"
#include "man.h"
+#include "main.h"
#ifndef MAP_FILE
#define MAP_FILE 0
@@ -70,7 +72,6 @@ static void resize_buf(struct buf *, s
static void mparse_buf_r(struct mparse *, struct buf, int);
static void mparse_readfd_r(struct mparse *, int, const char *, int);
static void pset(const char *, int, struct mparse *);
-static void pdesc(struct mparse *, const char *, int);
static int read_whole_file(const char *, int, struct buf *, int *);
static void mparse_end(struct mparse *);
@@ -547,38 +548,6 @@ rerun:
free(ln.buf);
}
-static void
-pdesc(struct mparse *curp, const char *file, int fd)
-{
- struct buf blk;
- int with_mmap;
-
- /*
- * Run for each opened file; may be called more than once for
- * each full parse sequence if the opened file is nested (i.e.,
- * from `so'). Simply sucks in the whole file and moves into
- * the parse phase for the file.
- */
-
- if ( ! read_whole_file(file, fd, &blk, &with_mmap)) {
- curp->file_status = MANDOCLEVEL_SYSERR;
- return;
- }
-
- /* Line number is per-file. */
-
- curp->line = 1;
-
- mparse_buf_r(curp, blk, 1);
-
-#ifdef HAVE_MMAP
- if (with_mmap)
- munmap(blk.buf, blk.sz);
- else
-#endif
- free(blk.buf);
-}
-
static int
read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap)
{
@@ -674,29 +643,72 @@ mparse_end(struct mparse *curp)
}
static void
-mparse_readfd_r(struct mparse *curp, int fd, const char *file, int re)
+mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file,
+ int re)
{
const char *svfile;
+ /* Line number is per-file. */
+ svfile = curp->file;
+ curp->file = file;
+ curp->line = 1;
+
+ mparse_buf_r(curp, blk, 1);
+
+ if (0 == re && MANDOCLEVEL_FATAL > curp->file_status)
+ mparse_end(curp);
+
+ curp->file = svfile;
+}
+
+enum mandoclevel
+mparse_readmem(struct mparse *curp, const void *buf, size_t len,
+ const char *file)
+{
+ struct buf blk;
+
+ blk.buf = UNCONST(buf);
+ blk.sz = len;
+
+ mparse_parse_buffer(curp, blk, file, 0);
+ return(curp->file_status);
+}
+
+static void
+mparse_readfd_r(struct mparse *curp, int fd, const char *file, int re)
+{
+ struct buf blk;
+ int with_mmap;
+
if (-1 == fd)
if (-1 == (fd = open(file, O_RDONLY, 0))) {
perror(file);
curp->file_status = MANDOCLEVEL_SYSERR;
return;
}
+ /*
+ * Run for each opened file; may be called more than once for
+ * each full parse sequence if the opened file is nested (i.e.,
+ * from `so'). Simply sucks in the whole file and moves into
+ * the parse phase for the file.
+ */
- svfile = curp->file;
- curp->file = file;
+ if ( ! read_whole_file(file, fd, &blk, &with_mmap)) {
+ curp->file_status = MANDOCLEVEL_SYSERR;
+ return;
+ }
- pdesc(curp, file, fd);
+ mparse_parse_buffer(curp, blk, file, re);
- if (0 == re && MANDOCLEVEL_FATAL > curp->file_status)
- mparse_end(curp);
+#ifdef HAVE_MMAP
+ if (with_mmap)
+ munmap(blk.buf, blk.sz);
+ else
+#endif
+ free(blk.buf);
if (STDIN_FILENO != fd && -1 == close(fd))
perror(file);
-
- curp->file = svfile;
}
enum mandoclevel
Home |
Main Index |
Thread Index |
Old Index