Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/share/misc Catch up to 20 years of HTML and URL changes.



details:   https://anonhg.NetBSD.org/src/rev/a4cd7cebfd0c
branches:  trunk
changeset: 373270:a4cd7cebfd0c
user:      jmcneill <jmcneill%NetBSD.org@localhost>
date:      Sat Jan 28 13:12:16 2023 +0000

description:
Catch up to 20 years of HTML and URL changes.

diffstat:

 share/misc/nanpa.awk |  93 ++++++++++++++++++++++++++++++++++++---------------
 share/misc/nanpa.sed |  10 +++--
 2 files changed, 72 insertions(+), 31 deletions(-)

diffs (166 lines):

diff -r 3f5df38f02c2 -r a4cd7cebfd0c share/misc/nanpa.awk
--- a/share/misc/nanpa.awk      Sat Jan 28 12:41:07 2023 +0000
+++ b/share/misc/nanpa.awk      Sat Jan 28 13:12:16 2023 +0000
@@ -1,11 +1,49 @@
-# $NetBSD: nanpa.awk,v 1.2 2003/03/13 02:55:01 jhawk Exp $
+# $NetBSD: nanpa.awk,v 1.3 2023/01/28 13:12:16 jmcneill Exp $
 #
 # todo:
-#      parse "http://docs.nanpa.com/cgi-bin/npa_reports/nanpa?
-#          function=list_npa_introduced" to produce parenthetical
-#          notes about what area codes are overlayed by others
-#          (or split from).
+#      parse "https://nationalnanpa.com/nanp1/npa_report.csv";
+#          instead of scraping HTML.
 #
+function trim(s)
+{
+       gsub(/^[ \t]+|[ \t]+$/, "", s);
+       return s;
+}
+function mapinit(postdb)
+{
+       while ((getline < postdb) > 0) {
+               sub(/#.*/, "");
+               if (length($0)==0) continue;
+               NF=split($0, f);
+               location[f[1]] = f[2];
+               flocation[tolower(f[2])] = f[2];
+               country[f[1]] = f[4];
+               fcountry[tolower(f[2])] = f[4];
+       }
+}
+function countrymap(s)
+{
+       if (s == "CA") return "Canada";
+       if (s == "US") return "USA";
+       return s;
+}
+function locationmap(s,        t)
+{
+       if (s in location) {
+               t = location[s];
+               if (s in country) {
+                       t = t " (" countrymap(country[s]) ")";
+               }
+       } else if (tolower(s) in flocation) {
+               t = flocation[tolower(s)];
+               if (tolower(s) in fcountry) {
+                       t = t " (" countrymap(fcountry[tolower(s)]) ")";
+               }
+       } else {
+               t = s;
+       }
+       return t;
+}
 function parse(file, ispipe, isplanning,       i, planinit, t)
 {
        planinit = 0;
@@ -13,30 +51,30 @@
                sub(/#.*/, "");
                if (length($0)==0) continue;
                if (isplanning) {
-                       split($0, f);
-                       if (!planinit && f[2]=="NEW NPA") {
+                       NF=split($0, f);
+                       if (!planinit && f[2]=="New NPA") {
                                planinit=1;
                                for (i=1; i<=NF; i++)
-                                       fnames[$i]=i-1;
-                       } else if (planinit && length(f[fnames["NEW NPA"]])>1) {
-                               t = f[fnames["LOCATION"]] FS;
-                               if (f[fnames["OVERLAY?"]]=="Yes")
-                                 t = t "Overlay of " f[fnames["OLD NPA"]];
-                               else if (f[fnames["OLD NPA"]])
-                                 t = t "Split of " f[fnames["OLD NPA"]];
-                               if (f[fnames["STATUS"]])
-                                       t = t " (" f[fnames["STATUS"]] ")";
-                               if (length(f[fnames["IN SERVICE DATE"]]) > 1)
+                                       fnames[f[i]]=i-1;
+                       } else if (planinit && length(f[fnames["New NPA"]])>1) {
+                               t = locationmap(trim(f[fnames["Location"]])) FS;
+                               if (trim(f[fnames["Overlay?"]])=="Yes")
+                                 t = t "Overlay of " trim(f[fnames["Old NPA"]]);
+                               else if (f[fnames["Old NPA"]])
+                                 t = t "Split of " trim(f[fnames["Old NPA"]]);
+                               if (f[fnames["Status"]])
+                                       t = t " (" trim(f[fnames["Status"]]) ")";
+                               if (length(f[fnames["In Service Date"]]) > 1)
                                        t = t " effective " \
-                                           f[fnames["IN SERVICE DATE"]];
-                               data[f[fnames["NEW NPA"]] "*"] = t;
+                                           trim(f[fnames["In Service Date"]]);
+                               data[trim(f[fnames["New NPA"]]) "*"] = t;
                        }
                } else {
                        # digits only
                        match($0, /^[0-9]/);
                        if (RSTART==0) continue;
                        i=index($0, FS);
-                       data[substr($0, 1, i-1)]=substr($0,i+1);
+                       data[substr($0, 1, i-1)]=locationmap(trim(substr($0,i+1)));
                }
        }
        close(file);
@@ -44,8 +82,9 @@
 
 BEGIN{
        FS=":"
+       mapinit("na.postal");
        print "# $""NetBSD: $";
-       print "# Generated from http://www.nanpa.com/area_codes/index.html";;
+       print "# Generated from https://nationalnanpa.com/area_codes/index.html";;
        print "# (with local exceptions)";
        print "# ";
        print "# format:";
@@ -54,14 +93,14 @@
        print "#   A * in the Area Code field indicates a future area code."
        print "# ";
        parse("ftp -o - " \
-           "http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?"; \
-           "function=list_npa_geo_number | sed -f nanpa.sed", 1, 0);
+           "https://nationalnanpa.com/enas/geoAreaCodeNumberReport.do"; \
+           " | sed -f nanpa.sed", 1, 0);
        parse("ftp -o - " \
-           "http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?"; \
-           "function=list_npa_non_geo | sed -f nanpa.sed", 1, 0);
+           "https://nationalnanpa.com/enas/nonGeoNpaServiceReport.do"; \
+           " | sed -f nanpa.sed", 1, 0);
        parse("ftp -o - " \
-           "http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?"; \
-           "function=list_npa_not_in_service | sed -f nanpa.sed", 1, 1);
+           "https://nationalnanpa.com/enas/plannedNpasNotInServiceReport.do"; \
+           " | sed -f nanpa.sed", 1, 1);
        parse("na.phone.add", 0, 0);
        sort="sort -n";
        for (i in data)
diff -r 3f5df38f02c2 -r a4cd7cebfd0c share/misc/nanpa.sed
--- a/share/misc/nanpa.sed      Sat Jan 28 12:41:07 2023 +0000
+++ b/share/misc/nanpa.sed      Sat Jan 28 13:12:16 2023 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: nanpa.sed,v 1.2 2006/12/25 18:39:48 wiz Exp $
+# $NetBSD: nanpa.sed,v 1.3 2023/01/28 13:12:16 jmcneill Exp $
 #
 # Parse HTML tables output by 
 #   http://docs.nanpa.com/cgi-bin/npa_reports/nanpa
@@ -34,7 +34,7 @@
 #                              Remove lines not starting with <TR>
 /<[Tt][Rr][^>]*>/!d
 #                              Replace all <TD> with colon
-s/[    ]*<TD[^>]*> */:/g
+s/[    ]*<[Tt][Dd][^>]*> */:/g
 #                              Strip all HTML tags
 s/<[^>]*>//g
 #                              Handle HTML characters
@@ -42,7 +42,9 @@
 #                              Compress spaces/tabs
 s/[    ][      ]*/ /g
 #                              Strip leading colons
-s/^://
+s/://
 #                              Strip leading/trailing whitespace
-s/^ //
+s/ *//
 s/ $//
+#                              Strip HTML comments
+s/^--.*$//



Home | Main Index | Thread Index | Old Index