Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/usr.bin/less/less Nuke unused V8 regexp.
details: https://anonhg.NetBSD.org/src/rev/e8742d117bad
branches: trunk
changeset: 475960:e8742d117bad
user: kleink <kleink%NetBSD.org@localhost>
date: Tue Aug 31 21:05:07 1999 +0000
description:
Nuke unused V8 regexp.
diffstat:
usr.bin/less/less/regexp.c | 1237 --------------------------------------------
usr.bin/less/less/regexp.h | 35 -
2 files changed, 0 insertions(+), 1272 deletions(-)
diffs (truncated from 1280 to 300 lines):
diff -r 04d341739413 -r e8742d117bad usr.bin/less/less/regexp.c
--- a/usr.bin/less/less/regexp.c Tue Aug 31 13:58:58 1999 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1237 +0,0 @@
-/* $NetBSD: regexp.c,v 1.1.1.4 1999/04/06 05:30:37 mrg Exp $ */
-
-/*
- * regcomp and regexec -- regsub and regerror are elsewhere
- *
- * Copyright (c) 1986 by University of Toronto.
- * Written by Henry Spencer. Not derived from licensed software.
- *
- * Permission is granted to anyone to use this software for any
- * purpose on any computer system, and to redistribute it freely,
- * subject to the following restrictions:
- *
- * 1. The author is not responsible for the consequences of use of
- * this software, no matter how awful, even if they arise
- * from defects in it.
- *
- * 2. The origin of this software must not be misrepresented, either
- * by explicit claim or by omission.
- *
- * 3. Altered versions must be plainly marked as such, and must not
- * be misrepresented as being the original software.
- *
- * Beware that some of this code is subtly aware of the way operator
- * precedence is structured in regular expressions. Serious changes in
- * regular-expression syntax might require a total rethink.
- *
- * *** NOTE: this code has been altered slightly for use in Tcl. ***
- * Slightly modified by David MacKenzie to undo most of the changes for TCL.
- */
-
-#include "less.h"
-#if HAVE_STDIO_H
-#include <stdio.h>
-#endif
-#if STDC_HEADERS
-#include <stdlib.h>
-#include <string.h>
-#endif
-#include "regexp.h"
-
-/*
- * The "internal use only" fields in regexp.h are present to pass info from
- * compile to execute that permits the execute phase to run lots faster on
- * simple cases. They are:
- *
- * regstart char that must begin a match; '\0' if none obvious
- * reganch is the match anchored (at beginning-of-line only)?
- * regmust string (pointer into program) that match must include, or NULL
- * regmlen length of regmust string
- *
- * Regstart and reganch permit very fast decisions on suitable starting points
- * for a match, cutting down the work a lot. Regmust permits fast rejection
- * of lines that cannot possibly match. The regmust tests are costly enough
- * that regcomp() supplies a regmust only if the r.e. contains something
- * potentially expensive (at present, the only such thing detected is * or +
- * at the start of the r.e., which can involve a lot of backup). Regmlen is
- * supplied because the test in regexec() needs it and regcomp() is
- * computing it anyway.
- */
-
-/*
- * Structure for regexp "program". This is essentially a linear encoding
- * of a nondeterministic finite-state machine (aka syntax charts or
- * "railroad normal form" in parsing technology). Each node is an opcode
- * plus a "next" pointer, possibly plus an operand. "Next" pointers of
- * all nodes except BRANCH implement concatenation; a "next" pointer with
- * a BRANCH on both ends of it is connecting two alternatives. (Here we
- * have one of the subtle syntax dependencies: an individual BRANCH (as
- * opposed to a collection of them) is never concatenated with anything
- * because of operator precedence.) The operand of some types of node is
- * a literal string; for others, it is a node leading into a sub-FSM. In
- * particular, the operand of a BRANCH node is the first node of the branch.
- * (NB this is *not* a tree structure: the tail of the branch connects
- * to the thing following the set of BRANCHes.) The opcodes are:
- */
-
-/* definition number opnd? meaning */
-#undef EOL
-#define END 0 /* no End of program. */
-#define BOL 1 /* no Match "" at beginning of line. */
-#define EOL 2 /* no Match "" at end of line. */
-#define ANY 3 /* no Match any one character. */
-#define ANYOF 4 /* str Match any character in this string. */
-#define ANYBUT 5 /* str Match any character not in this string. */
-#define BRANCH 6 /* node Match this alternative, or the next... */
-#define BACK 7 /* no Match "", "next" ptr points backward. */
-#define EXACTLY 8 /* str Match this string. */
-#define NOTHING 9 /* no Match empty string. */
-#define STAR 10 /* node Match this (simple) thing 0 or more times. */
-#define PLUS 11 /* node Match this (simple) thing 1 or more times. */
-#define OPEN 20 /* no Mark this point in input as start of #n. */
- /* OPEN+1 is number 1, etc. */
-#define CLOSE 30 /* no Analogous to OPEN. */
-
-/*
- * Opcode notes:
- *
- * BRANCH The set of branches constituting a single choice are hooked
- * together with their "next" pointers, since precedence prevents
- * anything being concatenated to any individual branch. The
- * "next" pointer of the last BRANCH in a choice points to the
- * thing following the whole choice. This is also where the
- * final "next" pointer of each individual branch points; each
- * branch starts with the operand node of a BRANCH node.
- *
- * BACK Normal "next" pointers all implicitly point forward; BACK
- * exists to make loop structures possible.
- *
- * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
- * BRANCH structures using BACK. Simple cases (one character
- * per match) are implemented with STAR and PLUS for speed
- * and to minimize recursive plunges.
- *
- * OPEN,CLOSE ...are numbered at compile time.
- */
-
-/*
- * A node is one char of opcode followed by two chars of "next" pointer.
- * "Next" pointers are stored as two 8-bit pieces, high order first. The
- * value is a positive offset from the opcode of the node containing it.
- * An operand, if any, simply follows the node. (Note that much of the
- * code generation knows about this implicit relationship.)
- *
- * Using two bytes for the "next" pointer is vast overkill for most things,
- * but allows patterns to get big without disasters.
- */
-#define OP(p) (*(p))
-#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
-#define OPERAND(p) ((p) + 3)
-
-/*
- * See regmagic.h for one further detail of program structure.
- */
-
-
-/*
- * Utility definitions.
- */
-#ifndef CHARBITS
-#define UCHARAT(p) ((int)*(unsigned char *)(p))
-#else
-#define UCHARAT(p) ((int)*(p)&CHARBITS)
-#endif
-
-#define FAIL(m) { regerror(m); return(NULL); }
-#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
-#define META "^$.[()|?+*\\"
-
-/*
- * Flags to be passed up and down.
- */
-#define HASWIDTH 01 /* Known never to match null string. */
-#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
-#define SPSTART 04 /* Starts with * or +. */
-#define WORST 0 /* Worst case. */
-
-/*
- * Global work variables for regcomp().
- */
-static char *regparse; /* Input-scan pointer. */
-static int regnpar; /* () count. */
-static char regdummy;
-static char *regcode; /* Code-emit pointer; ®dummy = don't. */
-static long regsize; /* Code size. */
-
-/*
- * The first byte of the regexp internal "program" is actually this magic
- * number; the start node begins in the second byte.
- */
-#define MAGIC 0234
-
-
-/*
- * Forward declarations for regcomp()'s friends.
- */
-#ifndef STATIC
-#define STATIC static
-#endif
-STATIC char *reg();
-STATIC char *regbranch();
-STATIC char *regpiece();
-STATIC char *regatom();
-STATIC char *regnode();
-STATIC char *regnext();
-STATIC void regc();
-STATIC void reginsert();
-STATIC void regtail();
-STATIC void regoptail();
-#ifdef STRCSPN
-STATIC int strcspn();
-#endif
-
-/*
- - regcomp - compile a regular expression into internal code
- *
- * We can't allocate space until we know how big the compiled form will be,
- * but we can't compile it (and thus know how big it is) until we've got a
- * place to put the code. So we cheat: we compile it twice, once with code
- * generation turned off and size counting turned on, and once "for real".
- * This also means that we don't allocate space until we are sure that the
- * thing really will compile successfully, and we never have to move the
- * code and thus invalidate pointers into it. (Note that it has to be in
- * one piece because free() must be able to free it all.)
- *
- * Beware that the optimization-preparation code in here knows about some
- * of the structure of the compiled regexp.
- */
-regexp *
-regcomp(exp)
-char *exp;
-{
- register regexp *r;
- register char *scan;
- register char *longest;
- register int len;
- int flags;
-
- if (exp == NULL)
- FAIL("NULL argument");
-
- /* First pass: determine size, legality. */
- regparse = exp;
- regnpar = 1;
- regsize = 0L;
- regcode = ®dummy;
- regc(MAGIC);
- if (reg(0, &flags) == NULL)
- return(NULL);
-
- /* Small enough for pointer-storage convention? */
- if (regsize >= 32767L) /* Probably could be 65535L. */
- FAIL("regexp too big");
-
- /* Allocate space. */
- r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize);
- if (r == NULL)
- FAIL("out of space");
-
- /* Second pass: emit code. */
- regparse = exp;
- regnpar = 1;
- regcode = r->program;
- regc(MAGIC);
- if (reg(0, &flags) == NULL)
- return(NULL);
-
- /* Dig out information for optimizations. */
- r->regstart = '\0'; /* Worst-case defaults. */
- r->reganch = 0;
- r->regmust = NULL;
- r->regmlen = 0;
- scan = r->program+1; /* First BRANCH. */
- if (OP(regnext(scan)) == END) { /* Only one top-level choice. */
- scan = OPERAND(scan);
-
- /* Starting-point info. */
- if (OP(scan) == EXACTLY)
- r->regstart = *OPERAND(scan);
- else if (OP(scan) == BOL)
- r->reganch++;
-
- /*
- * If there's something expensive in the r.e., find the
- * longest literal string that must appear and make it the
- * regmust. Resolve ties in favor of later strings, since
- * the regstart check works with the beginning of the r.e.
- * and avoiding duplication strengthens checking. Not a
- * strong reason, but sufficient in the absence of others.
- */
- if (flags&SPSTART) {
- longest = NULL;
- len = 0;
- for (; scan != NULL; scan = regnext(scan))
- if (OP(scan) == EXACTLY && ((int) strlen(OPERAND(scan))) >= len) {
- longest = OPERAND(scan);
- len = strlen(OPERAND(scan));
- }
- r->regmust = longest;
- r->regmlen = len;
- }
- }
-
- return(r);
-}
-
-/*
- - reg - regular expression, i.e. main body or parenthesized thing
- *
- * Caller must absorb opening parenthesis.
- *
- * Combining parenthesis handling with the base level of regular expression
- * is a trifle forced, but the need to tie the tails of the branches to what
- * follows makes it hard to avoid.
- */
-static char *
-reg(paren, flagp)
Home |
Main Index |
Thread Index |
Old Index