Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/share/misc Catch up to 20 years of HTML and URL changes.
details: https://anonhg.NetBSD.org/src/rev/a4cd7cebfd0c
branches: trunk
changeset: 373270:a4cd7cebfd0c
user: jmcneill <jmcneill%NetBSD.org@localhost>
date: Sat Jan 28 13:12:16 2023 +0000
description:
Catch up to 20 years of HTML and URL changes.
diffstat:
share/misc/nanpa.awk | 93 ++++++++++++++++++++++++++++++++++++---------------
share/misc/nanpa.sed | 10 +++--
2 files changed, 72 insertions(+), 31 deletions(-)
diffs (166 lines):
diff -r 3f5df38f02c2 -r a4cd7cebfd0c share/misc/nanpa.awk
--- a/share/misc/nanpa.awk Sat Jan 28 12:41:07 2023 +0000
+++ b/share/misc/nanpa.awk Sat Jan 28 13:12:16 2023 +0000
@@ -1,11 +1,49 @@
-# $NetBSD: nanpa.awk,v 1.2 2003/03/13 02:55:01 jhawk Exp $
+# $NetBSD: nanpa.awk,v 1.3 2023/01/28 13:12:16 jmcneill Exp $
#
# todo:
-# parse "http://docs.nanpa.com/cgi-bin/npa_reports/nanpa?
-# function=list_npa_introduced" to produce parenthetical
-# notes about what area codes are overlayed by others
-# (or split from).
+# parse "https://nationalnanpa.com/nanp1/npa_report.csv"
+# instead of scraping HTML.
#
+function trim(s)
+{
+ gsub(/^[ \t]+|[ \t]+$/, "", s);
+ return s;
+}
+function mapinit(postdb)
+{
+ while ((getline < postdb) > 0) {
+ sub(/#.*/, "");
+ if (length($0)==0) continue;
+ NF=split($0, f);
+ location[f[1]] = f[2];
+ flocation[tolower(f[2])] = f[2];
+ country[f[1]] = f[4];
+ fcountry[tolower(f[2])] = f[4];
+ }
+}
+function countrymap(s)
+{
+ if (s == "CA") return "Canada";
+ if (s == "US") return "USA";
+ return s;
+}
+function locationmap(s, t)
+{
+ if (s in location) {
+ t = location[s];
+ if (s in country) {
+ t = t " (" countrymap(country[s]) ")";
+ }
+ } else if (tolower(s) in flocation) {
+ t = flocation[tolower(s)];
+ if (tolower(s) in fcountry) {
+ t = t " (" countrymap(fcountry[tolower(s)]) ")";
+ }
+ } else {
+ t = s;
+ }
+ return t;
+}
function parse(file, ispipe, isplanning, i, planinit, t)
{
planinit = 0;
@@ -13,30 +51,30 @@
sub(/#.*/, "");
if (length($0)==0) continue;
if (isplanning) {
- split($0, f);
- if (!planinit && f[2]=="NEW NPA") {
+ NF=split($0, f);
+ if (!planinit && f[2]=="New NPA") {
planinit=1;
for (i=1; i<=NF; i++)
- fnames[$i]=i-1;
- } else if (planinit && length(f[fnames["NEW NPA"]])>1) {
- t = f[fnames["LOCATION"]] FS;
- if (f[fnames["OVERLAY?"]]=="Yes")
- t = t "Overlay of " f[fnames["OLD NPA"]];
- else if (f[fnames["OLD NPA"]])
- t = t "Split of " f[fnames["OLD NPA"]];
- if (f[fnames["STATUS"]])
- t = t " (" f[fnames["STATUS"]] ")";
- if (length(f[fnames["IN SERVICE DATE"]]) > 1)
+ fnames[f[i]]=i-1;
+ } else if (planinit && length(f[fnames["New NPA"]])>1) {
+ t = locationmap(trim(f[fnames["Location"]])) FS;
+ if (trim(f[fnames["Overlay?"]])=="Yes")
+ t = t "Overlay of " trim(f[fnames["Old NPA"]]);
+ else if (f[fnames["Old NPA"]])
+ t = t "Split of " trim(f[fnames["Old NPA"]]);
+ if (f[fnames["Status"]])
+ t = t " (" trim(f[fnames["Status"]]) ")";
+ if (length(f[fnames["In Service Date"]]) > 1)
t = t " effective " \
- f[fnames["IN SERVICE DATE"]];
- data[f[fnames["NEW NPA"]] "*"] = t;
+ trim(f[fnames["In Service Date"]]);
+ data[trim(f[fnames["New NPA"]]) "*"] = t;
}
} else {
# digits only
match($0, /^[0-9]/);
if (RSTART==0) continue;
i=index($0, FS);
- data[substr($0, 1, i-1)]=substr($0,i+1);
+ data[substr($0, 1, i-1)]=locationmap(trim(substr($0,i+1)));
}
}
close(file);
@@ -44,8 +82,9 @@
BEGIN{
FS=":"
+ mapinit("na.postal");
print "# $""NetBSD: $";
- print "# Generated from http://www.nanpa.com/area_codes/index.html";
+ print "# Generated from https://nationalnanpa.com/area_codes/index.html";
print "# (with local exceptions)";
print "# ";
print "# format:";
@@ -54,14 +93,14 @@
print "# A * in the Area Code field indicates a future area code."
print "# ";
parse("ftp -o - " \
- "http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?" \
- "function=list_npa_geo_number | sed -f nanpa.sed", 1, 0);
+ "https://nationalnanpa.com/enas/geoAreaCodeNumberReport.do" \
+ " | sed -f nanpa.sed", 1, 0);
parse("ftp -o - " \
- "http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?" \
- "function=list_npa_non_geo | sed -f nanpa.sed", 1, 0);
+ "https://nationalnanpa.com/enas/nonGeoNpaServiceReport.do" \
+ " | sed -f nanpa.sed", 1, 0);
parse("ftp -o - " \
- "http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?" \
- "function=list_npa_not_in_service | sed -f nanpa.sed", 1, 1);
+ "https://nationalnanpa.com/enas/plannedNpasNotInServiceReport.do" \
+ " | sed -f nanpa.sed", 1, 1);
parse("na.phone.add", 0, 0);
sort="sort -n";
for (i in data)
diff -r 3f5df38f02c2 -r a4cd7cebfd0c share/misc/nanpa.sed
--- a/share/misc/nanpa.sed Sat Jan 28 12:41:07 2023 +0000
+++ b/share/misc/nanpa.sed Sat Jan 28 13:12:16 2023 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: nanpa.sed,v 1.2 2006/12/25 18:39:48 wiz Exp $
+# $NetBSD: nanpa.sed,v 1.3 2023/01/28 13:12:16 jmcneill Exp $
#
# Parse HTML tables output by
# http://docs.nanpa.com/cgi-bin/npa_reports/nanpa
@@ -34,7 +34,7 @@
# Remove lines not starting with <TR>
/<[Tt][Rr][^>]*>/!d
# Replace all <TD> with colon
-s/[ ]*<TD[^>]*> */:/g
+s/[ ]*<[Tt][Dd][^>]*> */:/g
# Strip all HTML tags
s/<[^>]*>//g
# Handle HTML characters
@@ -42,7 +42,9 @@
# Compress spaces/tabs
s/[ ][ ]*/ /g
# Strip leading colons
-s/^://
+s/://
# Strip leading/trailing whitespace
-s/^ //
+s/ *//
s/ $//
+# Strip HTML comments
+s/^--.*$//
Home |
Main Index |
Thread Index |
Old Index