Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/usr.bin/make make: speed up pattern matching in the ':M' and...
details: https://anonhg.NetBSD.org/src/rev/655332ec3dbe
branches: trunk
changeset: 376574:655332ec3dbe
user: rillig <rillig%NetBSD.org@localhost>
date: Thu Jun 22 12:59:54 2023 +0000
description:
make: speed up pattern matching in the ':M' and ':N' modifiers
In the code coverage report, the highest count for Str_Match goes from
5,298,924 down to 79,646.
diffstat:
usr.bin/make/str.c | 67 ++++++++++++++++++++++++--------
usr.bin/make/unit-tests/varmod-match.mk | 20 ++++++--
2 files changed, 63 insertions(+), 24 deletions(-)
diffs (154 lines):
diff -r 2176722211aa -r 655332ec3dbe usr.bin/make/str.c
--- a/usr.bin/make/str.c Thu Jun 22 09:09:08 2023 +0000
+++ b/usr.bin/make/str.c Thu Jun 22 12:59:54 2023 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: str.c,v 1.94 2022/12/07 10:28:48 rillig Exp $ */
+/* $NetBSD: str.c,v 1.95 2023/06/22 12:59:54 rillig Exp $ */
/*
* Copyright (c) 1988, 1989, 1990, 1993
@@ -71,7 +71,7 @@
#include "make.h"
/* "@(#)str.c 5.8 (Berkeley) 6/1/90" */
-MAKE_RCSID("$NetBSD: str.c,v 1.94 2022/12/07 10:28:48 rillig Exp $");
+MAKE_RCSID("$NetBSD: str.c,v 1.95 2023/06/22 12:59:54 rillig Exp $");
static HashTable interned_strings;
@@ -323,19 +323,18 @@ in_range(char e1, char c, char e2)
bool
Str_Match(const char *str, const char *pat)
{
- for (; *pat != '\0'; pat++, str++) {
- if (*pat == '*') { /* match any substring */
- pat++;
- while (*pat == '*')
- pat++;
- if (*pat == '\0')
- return true;
- for (; *str != '\0'; str++)
- if (Str_Match(str, pat))
- return true;
- return false;
- }
+ enum { MFL_START, MFL_MIDDLE, MFL_END } mfl;
+ const char *str1, *pat1;
+ bool matched;
+ mfl = MFL_START;
+ str1 = str;
+ pat1 = pat;
+match_fixed_length:
+ str = str1;
+ pat = pat1;
+ matched = false;
+ for (; *pat != '\0' && *pat != '*'; str++, pat++) {
if (*str == '\0')
return false;
@@ -350,7 +349,7 @@ Str_Match(const char *str, const char *p
if (*pat == ']' || *pat == '\0') {
if (neg)
break;
- return false;
+ goto match_done;
}
if (*pat == *str)
break;
@@ -364,7 +363,7 @@ Str_Match(const char *str, const char *p
pat++;
}
if (neg && *pat != ']' && *pat != '\0')
- return false;
+ goto match_done;
while (*pat != ']' && *pat != '\0')
pat++;
if (*pat == '\0')
@@ -374,11 +373,43 @@ Str_Match(const char *str, const char *p
if (*pat == '\\') /* match the next character exactly */
pat++;
+ if (*pat != *str)
+ goto match_done;
+ }
+ matched = true;
- if (*pat != *str)
+match_done:
+ switch (mfl) {
+ case MFL_START:
+ if (!matched)
return false;
+ if (*pat == '\0')
+ return *str == '\0';
+ mfl = MFL_MIDDLE;
+ break;
+ default:
+ if (!matched) {
+ str1++;
+ goto match_fixed_length;
+ }
+ if (*pat == '\0') {
+ mfl = MFL_END;
+ str1 = str + strlen(str) - (str - str1);
+ goto match_fixed_length;
+ }
+ break;
+ case MFL_END:
+ return matched;
}
- return *str == '\0';
+
+ while (*pat == '*')
+ pat++;
+ if (*pat == '\0')
+ return true;
+ if (*str == '\0')
+ return false;
+ str1 = str, pat1 = pat;
+ goto match_fixed_length;
}
void
diff -r 2176722211aa -r 655332ec3dbe usr.bin/make/unit-tests/varmod-match.mk
--- a/usr.bin/make/unit-tests/varmod-match.mk Thu Jun 22 09:09:08 2023 +0000
+++ b/usr.bin/make/unit-tests/varmod-match.mk Thu Jun 22 12:59:54 2023 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: varmod-match.mk,v 1.13 2023/06/22 09:09:08 rillig Exp $
+# $NetBSD: varmod-match.mk,v 1.14 2023/06/22 12:59:54 rillig Exp $
#
# Tests for the :M variable modifier, which filters words that match the
# given pattern.
@@ -33,11 +33,12 @@ NUMBERS= One Two Three Four five six sev
.if ${:U****************:M****************b}
.endif
-# As of 2023-06-22, this expression calls Str_Match 2,621,112 times.
-# Adding another '*?' to the pattern calls Str_Match 20,630,572 times.
-# Adding another '*?' to the pattern calls Str_Match 136,405,672 times.
-# Adding another '*?' to the pattern calls Str_Match 773,168,722 times.
-# Adding another '*?' to the pattern calls Str_Match 3,815,481,072 times.
+# Before 2023-06-22, this expression called Str_Match 2,621,112 times.
+# Adding another '*?' to the pattern called Str_Match 20,630,572 times.
+# Adding another '*?' to the pattern called Str_Match 136,405,672 times.
+# Adding another '*?' to the pattern called Str_Match 773,168,722 times.
+# Adding another '*?' to the pattern called Str_Match 3,815,481,072 times.
+# Since 2023-06-22, Str_Match no longer backtracks.
.if ${:U..................................................b:M*?*?*?*?*?a}
.endif
@@ -217,6 +218,13 @@ WORDS= - + x xx 0 1 2 3 4 [x1-3
. error
.endif
+# *[-x1-3 Incomplete character list after a wildcard, matches those
+# words that end with one of the characters from the list.
+WORDS= - + x xx 0 1 2 3 4 00 01 10 11 000 001 010 011 100 101 110 111 [x1-3
+.if ${WORDS:M*[-x1-3} != "- x xx 1 2 3 01 11 001 011 101 111 [x1-3"
+. warning ${WORDS:M*[-x1-3}
+.endif
+
# [^-x1-3
# Incomplete negated character list, matches any character
# except those elements that can be parsed without lookahead.
Home |
Main Index |
Thread Index |
Old Index