tech-userlevel archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: Changing fgetstr/fgetln to use getdelim
Roy Marples wrote:
Attached is a patch that changes libc fgetstr/fgetln to use getdelim(3).
This saves over 200 bytes on amd64 (although adding getdelim in the
process probably added more still - heh).
This new version makes __getdelim use the same interface as getdelim,
but returns 0 on EOF and works with strings of SIZE_MAX - 2, allowing a
NULL terminator and the error code -1. Maybe this should be propsed to
the OpenGroup as fgetdelim and fix all 3 issues a few people have with
getdelim?
Thanks
Roy
Index: stdio/fgetln.c
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/fgetln.c,v
retrieving revision 1.14
diff -u -p -r1.14 fgetln.c
--- stdio/fgetln.c 10 May 2004 16:47:11 -0000 1.14
+++ stdio/fgetln.c 21 Sep 2009 07:25:21 -0000
@@ -64,10 +64,6 @@ fgetln(fp, lenp)
FILE *fp;
size_t *lenp;
{
- char *cp;
-
- FLOCKFILE(fp);
- cp = __fgetstr(fp, lenp, '\n');
- FUNLOCKFILE(fp);
- return cp;
+
+ return __fgetstr(fp, lenp, '\n');
}
Index: stdio/fgetstr.c
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/fgetstr.c,v
retrieving revision 1.5
diff -u -p -r1.5 fgetstr.c
--- stdio/fgetstr.c 31 Jan 2009 06:14:13 -0000 1.5
+++ stdio/fgetstr.c 21 Sep 2009 07:25:21 -0000
@@ -1,11 +1,10 @@
-/* $NetBSD: fgetstr.c,v 1.5 2009/01/31 06:14:13 lukem Exp $ */
+/* $NetBSD: fgetstr.c,v 1.5 2009/01/31 06:14:13 lukem Exp $ */
-/*-
- * Copyright (c) 1990, 1993
- * The Regents of the University of California. All rights reserved.
+/*
+ * Copyright (c) 2009 The NetBSD Foundation, Inc.
*
- * This code is derived from software contributed to Berkeley by
- * Chris Torek.
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Roy Marples.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -15,160 +14,60 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
*
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
-#if defined(LIBC_SCCS) && !defined(lint)
-#if 0
-static char sccsid[] = "@(#)fgetline.c 8.1 (Berkeley) 6/4/93";
-#else
-__RCSID("$NetBSD: fgetstr.c,v 1.5 2009/01/31 06:14:13 lukem Exp $");
-#endif
-#endif /* LIBC_SCCS and not lint */
+__RCSID("$NetBSD: fgetstr.c,v 1.3 2009/07/14 18:29:41 roy Exp $");
#include "namespace.h"
#include <assert.h>
+#include <errno.h>
+#include <limits.h>
#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+
#include "reentrant.h"
#include "local.h"
/*
- * Expand the line buffer. Return -1 on error.
-#ifdef notdef
- * The `new size' does not account for a terminating '\0',
- * so we add 1 here.
-#endif
- */
-int
-__slbexpand(fp, newsize)
- FILE *fp;
- size_t newsize;
-{
- void *p;
-
-#ifdef notdef
- ++newsize;
-#endif
- _DIAGASSERT(fp != NULL);
-
- if ((size_t)fp->_lb._size >= newsize)
- return (0);
- if ((p = realloc(fp->_lb._base, newsize)) == NULL)
- return (-1);
- fp->_lb._base = p;
- fp->_lb._size = newsize;
- return (0);
-}
-
-/*
- * Get an input line. The returned pointer often (but not always)
- * points into a stdio buffer. Fgetline does not alter the text of
- * the returned line (which is thus not a C string because it will
- * not necessarily end with '\0'), but does allow callers to modify
- * it if they wish. Thus, we set __SMOD in case the caller does.
+ * Get an input line.
+ * This now uses getdelim(3) for a code reduction.
+ * The upside is that strings are now always NULL terminated, but relying
+ * on this is non portable - better to use the POSIX getdelim(3) function.
*/
char *
-__fgetstr(fp, lenp, sep)
- FILE *fp;
- size_t *lenp;
- int sep;
+__fgetstr(FILE *__restrict fp, size_t *__restrict lenp, int sep)
{
- unsigned char *p;
- size_t len;
- size_t off;
+ char *p;
+ size_t size;
_DIAGASSERT(fp != NULL);
_DIAGASSERT(lenp != NULL);
- /* make sure there is input */
- if (fp->_r <= 0 && __srefill(fp)) {
- *lenp = 0;
- return (NULL);
- }
-
- /* look for a newline in the input */
- if ((p = memchr((void *)fp->_p, sep, (size_t)fp->_r)) != NULL) {
- char *ret;
-
- /*
- * Found one. Flag buffer as modified to keep fseek from
- * `optimising' a backward seek, in case the user stomps on
- * the text.
- */
- p++; /* advance over it */
- ret = (char *)fp->_p;
- *lenp = len = p - fp->_p;
- fp->_flags |= __SMOD;
- fp->_r -= len;
- fp->_p = p;
- return (ret);
+ p = (char *)fp->_lb._base;
+ size = fp->_lb._size;
+ *lenp = __getdelim(&p, &size, sep, fp);
+ /* The struct size variable is only an int ..... */
+ if (size > INT_MAX) {
+ fp->_lb._size = INT_MAX;
+ errno = EOVERFLOW;
+ goto error;
}
-
- /*
- * We have to copy the current buffered data to the line buffer.
- * As a bonus, though, we can leave off the __SMOD.
- *
- * OPTIMISTIC is length that we (optimistically) expect will
- * accommodate the `rest' of the string, on each trip through the
- * loop below.
- */
-#define OPTIMISTIC 80
-
- for (len = fp->_r, off = 0;; len += fp->_r) {
- size_t diff;
-
- /*
- * Make sure there is room for more bytes. Copy data from
- * file buffer to line buffer, refill file and look for
- * newline. The loop stops only when we find a newline.
- */
- if (__slbexpand(fp, len + OPTIMISTIC))
- goto error;
- (void)memcpy((void *)(fp->_lb._base + off), (void *)fp->_p,
- len - off);
- off = len;
- if (__srefill(fp))
- break; /* EOF or error: return partial line */
- if ((p = memchr((void *)fp->_p, sep, (size_t)fp->_r)) == NULL)
- continue;
-
- /* got it: finish up the line (like code above) */
- p++;
- diff = p - fp->_p;
- len += diff;
- if (__slbexpand(fp, len))
- goto error;
- (void)memcpy((void *)(fp->_lb._base + off), (void *)fp->_p,
- diff);
- fp->_r -= diff;
- fp->_p = p;
- break;
- }
- *lenp = len;
-#ifdef notdef
- fp->_lb._base[len] = 0;
-#endif
- return ((char *)fp->_lb._base);
-
+ fp->_lb._size = (int)size;
+ if (*lenp < SIZE_MAX)
+ return p;
error:
- *lenp = 0; /* ??? */
- return (NULL); /* ??? */
+ *lenp = 0;
+ return NULL;
}
Index: stdio/fgetwln.c
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/fgetwln.c,v
retrieving revision 1.2
diff -u -p -r1.2 fgetwln.c
--- stdio/fgetwln.c 31 Jan 2009 06:08:28 -0000 1.2
+++ stdio/fgetwln.c 21 Sep 2009 07:25:21 -0000
@@ -36,7 +36,11 @@ __RCSID("$NetBSD: fgetwln.c,v 1.2 2009/0
#endif /* LIBC_SCCS and not lint */
#include "namespace.h"
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
#include <stdio.h>
+#include <stdlib.h>
#include <wchar.h>
#include "reentrant.h"
#include "local.h"
@@ -45,6 +49,37 @@ __RCSID("$NetBSD: fgetwln.c,v 1.2 2009/0
__weak_alias(fgetwln,_fgetwln)
#endif
+/*
+ * Expand the line buffer. Return -1 on error.
+#ifdef notdef
+ * The `new size' does not account for a terminating '\0',
+ * so we add 1 here.
+#endif
+ */
+static int
+__slbexpand(FILE *fp, size_t newsize)
+{
+ void *p;
+
+#ifdef notdef
+ ++newsize;
+#endif
+ _DIAGASSERT(fp != NULL);
+
+ /* fp->_lb._size is an int ..... */
+ if (newsize > INT_MAX) {
+ errno = EOVERFLOW;
+ return (-1);
+ }
+ if ((size_t)fp->_lb._size >= newsize)
+ return (0);
+ if ((p = realloc(fp->_lb._base, newsize)) == NULL)
+ return (-1);
+ fp->_lb._base = p;
+ fp->_lb._size = newsize;
+ return (0);
+}
+
wchar_t *
fgetwln(FILE * __restrict fp, size_t *lenp)
{
Index: stdio/getdelim.c
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/getdelim.c,v
retrieving revision 1.3
diff -u -p -r1.3 getdelim.c
--- stdio/getdelim.c 14 Jul 2009 18:29:41 -0000 1.3
+++ stdio/getdelim.c 21 Sep 2009 07:25:21 -0000
@@ -30,6 +30,8 @@
#include <sys/cdefs.h>
__RCSID("$NetBSD: getdelim.c,v 1.3 2009/07/14 18:29:41 roy Exp $");
+#include "namespace.h"
+
#include <sys/param.h>
#include <assert.h>
@@ -47,12 +49,15 @@ __RCSID("$NetBSD: getdelim.c,v 1.3 2009/
* without the need for a realloc. */
#define MINBUF 128
+/* This private function allows strings of upto SIZE_MAX - 2
+ * and returns 0 on EOF, both of which are disallowed by POSIX.
+ * Maybe this should be named fgetdelim and proposed to the OpenGroup....*/
ssize_t
-getdelim(char **__restrict buf, size_t *__restrict buflen,
+__getdelim(char **__restrict buf, size_t *__restrict buflen,
int sep, FILE *__restrict fp)
{
unsigned char *p;
- size_t len, off, newlen;
+ size_t len, newlen, off;
char *newb;
_DIAGASSERT(fp != NULL);
@@ -72,8 +77,7 @@ getdelim(char **__restrict buf, size_t *
for (;;) {
/* If the input buffer is empty, refill it */
if (fp->_r <= 0 && __srefill(fp)) {
- /* POSIX requires we return -1 on EOF */
- if (off == 0 || __sferror(fp))
+ if (__sferror(fp))
goto error;
break;
}
@@ -85,13 +89,12 @@ getdelim(char **__restrict buf, size_t *
else
len = (p - fp->_p) + 1;
- newlen = off + len;
- /* Ensure that the resultant buffer length fits in ssize_t */
- if (newlen > (size_t)SSIZE_MAX) {
+ newlen = off + len + 1;
+ /* Ensure we can handle it */
+ if (newlen < off || newlen > SIZE_MAX - 2) {
errno = EOVERFLOW;
goto error;
}
- newlen++; /* reserve space for the NULL terminator */
if (newlen > *buflen) {
if (newlen < MINBUF)
newlen = MINBUF;
@@ -133,3 +136,21 @@ error:
FUNLOCKFILE(fp);
return -1;
}
+
+ssize_t
+getdelim(char **__restrict buf, size_t *__restrict buflen,
+ int sep, FILE *__restrict fp)
+{
+ ssize_t len;
+
+ len = __getdelim(buf, buflen, sep, fp);
+ if (len == 0) {
+ /* POSIX requires that we return -1 on EOF */
+ return -1;
+ } else if (len < -1) {
+ /* POSIX requires no string larger than SSIZE_MAX */
+ errno = EOVERFLOW;
+ return -1;
+ }
+ return len;
+}
Index: stdio/local.h
===================================================================
RCS file: /cvsroot/src/lib/libc/stdio/local.h,v
retrieving revision 1.20
diff -u -p -r1.20 local.h
--- stdio/local.h 14 May 2005 23:51:02 -0000 1.20
+++ stdio/local.h 21 Sep 2009 07:25:21 -0000
@@ -75,8 +75,9 @@ extern int __gettemp __P((char *, int *,
extern wint_t __fgetwc_unlock __P((FILE *));
extern wint_t __fputwc_unlock __P((wchar_t, FILE *));
+extern ssize_t __getdelim __P((char ** __restrict, size_t * __restrict, int,
+ FILE * __restrict));
extern char *__fgetstr __P((FILE * __restrict, size_t * __restrict, int));
-extern int __slbexpand __P((FILE *, size_t));
extern int __vfwprintf_unlocked __P((FILE *, const wchar_t *,
_BSD_VA_LIST_));
extern int __vfwscanf_unlocked __P((FILE * __restrict,
Home |
Main Index |
Thread Index |
Old Index