pkgsrc-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[pkgsrc/trunk]: pkgsrc/net/libfetch libfetch-2.5:



details:   https://anonhg.NetBSD.org/pkgsrc/rev/a48cac00c671
branches:  trunk
changeset: 541300:a48cac00c671
user:      joerg <joerg%pkgsrc.org@localhost>
date:      Fri Apr 18 21:13:10 2008 +0000

description:
libfetch-2.5:
Add basic index parsing support for HTTP based on the ftpio.c code in
pkg_install. Permission to use the 3-clause BSD license from Thomas
Klausner in private mail.

diffstat:

 net/libfetch/Makefile     |    4 +-
 net/libfetch/files/http.c |  199 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 196 insertions(+), 7 deletions(-)

diffs (239 lines):

diff -r 103878dc914c -r a48cac00c671 net/libfetch/Makefile
--- a/net/libfetch/Makefile     Fri Apr 18 20:44:36 2008 +0000
+++ b/net/libfetch/Makefile     Fri Apr 18 21:13:10 2008 +0000
@@ -1,7 +1,7 @@
-# $NetBSD: Makefile,v 1.7 2008/04/16 01:01:50 joerg Exp $
+# $NetBSD: Makefile,v 1.8 2008/04/18 21:13:10 joerg Exp $
 #
 
-DISTNAME=      libfetch-2.4
+DISTNAME=      libfetch-2.5
 CATEGORIES=    net
 MASTER_SITES=  # empty
 DISTFILES=     # empty
diff -r 103878dc914c -r a48cac00c671 net/libfetch/files/http.c
--- a/net/libfetch/files/http.c Fri Apr 18 20:44:36 2008 +0000
+++ b/net/libfetch/files/http.c Fri Apr 18 21:13:10 2008 +0000
@@ -1,6 +1,8 @@
-/*     $NetBSD: http.c,v 1.13 2008/04/16 15:10:18 joerg Exp $  */
+/*     $NetBSD: http.c,v 1.14 2008/04/18 21:13:10 joerg Exp $  */
 /*-
  * Copyright (c) 2000-2004 Dag-Erling Coïdan Smørgrav
+ * Copyright (c) 2003 Thomas Klausner <wiz%NetBSD.org@localhost>
+ * Copyright (c) 2008 Joerg Sonnenberger <joerg%NetBSD.org@localhost>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1150,14 +1152,202 @@
        return (0);
 }
 
+enum http_states {
+       ST_NONE,
+       ST_LT,
+       ST_LTA,
+       ST_TAGA,
+       ST_H,
+       ST_R,
+       ST_E,
+       ST_F,
+       ST_HREF,
+       ST_HREFQ,
+       ST_TAG,
+       ST_TAGAX,
+       ST_TAGAQ
+};
+
+struct index_parser {
+       enum http_states state;
+       struct url_ent *ue;
+       int list_size, list_len;
+};
+
+static size_t
+parse_index(struct index_parser *parser, const char *buf, size_t len)
+{
+       char *end_attr, p = *buf;
+
+       switch (parser->state) {
+       case ST_NONE:
+               /* Plain text, not in markup */
+               if (p == '<')
+                       parser->state = ST_LT;
+               return 1;
+       case ST_LT:
+               /* In tag -- "<" already found */
+               if (p == '>')
+                       parser->state = ST_NONE;
+               else if (p == 'a' || p == 'A')
+                       parser->state = ST_LTA;
+               else if (!isspace((unsigned char)p))
+                       parser->state = ST_TAG;
+               return 1;
+       case ST_LTA:
+               /* In tag -- "<a" already found */
+               if (p == '>')
+                       parser->state = ST_NONE;
+               else if (p == '"')
+                       parser->state = ST_TAGAQ;
+               else if (isspace((unsigned char)p))
+                       parser->state = ST_TAGA;
+               else
+                       parser->state = ST_TAG;
+               return 1;
+       case ST_TAG:
+               /* In tag, but not "<a" -- disregard */
+               if (p == '>')
+                       parser->state = ST_NONE;
+               return 1;
+       case ST_TAGA:
+               /* In a-tag -- "<a " already found */
+               if (p == '>')
+                       parser->state = ST_NONE;
+               else if (p == '"')
+                       parser->state = ST_TAGAQ;
+               else if (p == 'h' || p == 'H')
+                       parser->state = ST_H;
+               else if (!isspace((unsigned char)p))
+                       parser->state = ST_TAGAX;
+               return 1;
+       case ST_TAGAX:
+               /* In unknown keyword in a-tag */
+               if (p == '>')
+                       parser->state = ST_NONE;
+               else if (p == '"')
+                       parser->state = ST_TAGAQ;
+               else if (isspace((unsigned char)p))
+                       parser->state = ST_TAGA;
+               return 1;
+       case ST_TAGAQ:
+               /* In a-tag, unknown argument for keys. */
+               if (p == '>')
+                       parser->state = ST_NONE;
+               else if (p == '"')
+                       parser->state = ST_TAGA;
+               return 1;
+       case ST_H:
+               /* In a-tag -- "<a h" already found */
+               if (p == '>')
+                       parser->state = ST_NONE;
+               else if (p == '"')
+                       parser->state = ST_TAGAQ;
+               else if (p == 'r' || p == 'R')
+                       parser->state = ST_R;
+               else if (isspace((unsigned char)p))
+                       parser->state = ST_TAGA;
+               else
+                       parser->state = ST_TAGAX;
+               return 1;
+       case ST_R:
+               /* In a-tag -- "<a hr" already found */
+               if (p == '>')
+                       parser->state = ST_NONE;
+               else if (p == '"')
+                       parser->state = ST_TAGAQ;
+               else if (p == 'e' || p == 'E')
+                       parser->state = ST_E;
+               else if (isspace((unsigned char)p))
+                       parser->state = ST_TAGA;
+               else
+                       parser->state = ST_TAGAX;
+               return 1;
+       case ST_E:
+               /* In a-tag -- "<a hre" already found */
+               if (p == '>')
+                       parser->state = ST_NONE;
+               else if (p == '"')
+                       parser->state = ST_TAGAQ;
+               else if (p == 'f' || p == 'F')
+                       parser->state = ST_F;
+               else if (isspace((unsigned char)p))
+                       parser->state = ST_TAGA;
+               else
+                       parser->state = ST_TAGAX;
+               return 1;
+       case ST_F:
+               /* In a-tag -- "<a href" already found */
+               if (p == '>')
+                       parser->state = ST_NONE;
+               else if (p == '"')
+                       parser->state = ST_TAGAQ;
+               else if (p == '=')
+                       parser->state = ST_HREF;
+               else if (!isspace((unsigned char)p))
+                       parser->state = ST_TAGAX;
+               return 1;
+       case ST_HREF:
+               /* In a-tag -- "<a href=" already found */
+               if (p == '>')
+                       parser->state = ST_NONE;
+               else if (p == '"')
+                       parser->state = ST_HREFQ;
+               else if (!isspace((unsigned char)p))
+                       parser->state = ST_TAGA;
+               return 1;
+       case ST_HREFQ:
+               /* In href of the a-tag */
+               end_attr = memchr(buf, '"', len);
+               if (end_attr == NULL)
+                       return 0;
+               *end_attr = '\0';
+               parser->state = ST_TAGA;
+               fetch_add_entry(&parser->ue, &parser->list_size, &parser->list_len, buf, NULL);
+               return end_attr + 1 - buf;
+       }
+       abort();
+}
+
 /*
  * List a directory
  */
 struct url_ent *
 fetchFilteredListHTTP(struct url *url, const char *pattern, const char *flags)
 {
-       fprintf(stderr, "fetchFilteredListHTTP(): not implemented\n");
-       return (NULL);
+       fetchIO *f;
+       char buf[2 * PATH_MAX];
+       size_t buf_len, processed, sum_processed;
+       ssize_t read_len;
+       struct index_parser state;
+
+       state.state = ST_NONE;
+       state.ue = NULL;
+       state.list_size = state.list_len = 0;
+
+       f = fetchGetHTTP(url, flags);
+       if (f == NULL)
+               return NULL;
+
+       buf_len = 0;
+
+       while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) {
+               buf_len += read_len;
+               sum_processed = 0;
+               do {
+                       processed = parse_index(&state, buf + sum_processed, buf_len);
+                       buf_len -= processed;
+                       sum_processed += processed;
+               } while (processed != 0 && buf_len > 0);
+               memmove(buf, buf + sum_processed, buf_len);
+       }
+
+       fetchIO_close(f);
+       if (read_len < 0) {
+               free(state.ue);
+               state.ue = NULL;
+       }
+       return state.ue;
 }
 
 /*
@@ -1166,6 +1356,5 @@
 struct url_ent *
 fetchListHTTP(struct url *url, const char *flags)
 {
-       fprintf(stderr, "fetchListHTTP(): not implemented\n");
-       return (NULL);
+       return fetchFilteredList(url, "*", flags);
 }



Home | Main Index | Thread Index | Old Index