Added the 7 new top level domains, and reworded the nameorgs output.

Not sure this is better in all cases. parse(): Fixed a bug in the output; the dict is referred to in the code as `countries' not `country'. Also added no-case-fold for the string "U.S." since the Virgin Islands name no longer wraps those in parentheses. main(): Fixed the argument parsing to agree with the docstring, i.e. --outputdict instead of --output. In the module docstring: - updated my email address - we don't need to explain about Python 1.5 regexps <wink> We also don't need to wrap the import of re with a try/except. Other style fixes: - untabification - revert back to <> style everywhere (and consistently)

Added the 7 new top level domains, and reworded the nameorgs output.
Not sure this is better in all cases. parse(): Fixed a bug in the output; the dict is referred to in the code as `countries' not `country'. Also added no-case-fold for the string "U.S." since the Virgin Islands name no longer wraps those in parentheses. main(): Fixed the argument parsing to agree with the docstring, i.e. --outputdict instead of --output. In the module docstring: - updated my email address - we don't need to explain about Python 1.5 regexps <wink> We also don't need to wrap the import of re with a try/except. Other style fixes: - untabification - revert back to <> style everywhere (and consistently)
ba3d6162 · Barry Warsaw · 25c97029 · ba3d6162
Commit ba3d6162 authored Jun 07, 2002 by Barry Warsaw
Show whitespace changes
Inline Side-by-side

Showing with 110 additions and 113 deletions

Tools/world/world Tools/world/world +110 -113

No files found.
--- a/Tools/world/world
+++ b/Tools/world/world
@@ -3,7 +3,7 @@
 """world -- Print mappings between country names and DNS country codes.

 Contact: Barry Warsaw
-Email:   bwarsaw@python.org
+Email:   barry@python.org
 Version: %(__version__)s

 This script will take a list of Internet addresses and print out where in the
@@ -14,9 +14,9 @@ code found in the address.  Addresses can be in any of the following forms:
    host.domain.xx    -- any Internet host or network name
    somebody@where.xx -- an Internet email address

-If no match is found, the address is interpreted as a regular expression [*]
-and a reverse lookup is attempted.  This script will search the country names
-and print a list of matching entries.  You can force reverse mappings with the
+If no match is found, the address is interpreted as a regular expression and a
+reverse lookup is attempted.  This script will search the country names and
+print a list of matching entries.  You can force reverse mappings with the
 `-r' flag (see below).

 For example:
@@ -34,10 +34,6 @@ For example:
        tz: Tanzania, United Republic of
        gb: United Kingdom

-
- [*] Note that regular expressions must conform to Python 1.5's re.py module
- syntax.  The comparison is done with the search() method.
-
 Country codes are maintained by the RIPE Network Coordination Centre,
 in coordination with the ISO 3166 Maintenance Agency at DIN Berlin.  The
 authoritative source of country code mappings is:
@@ -69,7 +65,7 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
        When used in conjunction with the `-p' option, output is in the form
        of a Python dictionary, and country names are normalized
        w.r.t. capitalization.  This makes it appropriate for cutting and
-        pasting back into this file.
+        pasting back into this file.  Output is always to standard out.

    --reverse
    -r
@@ -82,18 +78,13 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
    -h
    --help
        Print this message.
-
 """
 __version__ = '$Revision$'


 import sys
 import getopt
-try:
-    import re
-except ImportError:
-    print sys.argv[0], 'requires Python 1.5'
-    sys.exit(1)
+import re

 PROGRAM = sys.argv[0]

@@ -114,11 +105,7 @@ def resolve(rawaddr):
        return rawaddr
    addr = parts[-1]
    if nameorgs.has_key(addr):
-        if nameorgs[addr][0].lower() in 'aeiou':
-            ana = 'an'
-        else:
-            ana = 'a'
-	print rawaddr, 'is from', ana, nameorgs[addr], 'organization'
+        print rawaddr, 'is in the', nameorgs[addr], 'top level domain'
        return None
    elif countries.has_key(addr):
        print rawaddr, 'originated from', countries[addr]
@@ -161,7 +148,7 @@ def parse(file, normalize):
    scanning = 0

    if normalize:
-	print 'country = {'
+        print 'countries = {'

    while 1:
        line = fp.readline()
@@ -186,16 +173,17 @@ def parse(file, normalize):
                    # XXX special cases
                    if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
                        words[i] = w.lower()
-		    elif w == 'THE' and i != 1:
+                    elif w == 'THE' and i <> 1:
                        words[i] = w.lower()
                    elif len(w) > 3 and w[1] == "'":
                        words[i] = w[0:3].upper() + w[3:].lower()
-		    elif w == '(U.S.)':
+                    elif w in ('(U.S.)', 'U.S.'):
                        pass
-		    elif w[0] == '(' and w != '(local':
+                    elif w[0] == '(' and w <> '(local':
                        words[i] = '(' + w[1:].capitalize()
-		    elif w.find('-') != -1:
-			words[i] = '-'.join([s.capitalize() for s in w.split('-')])
+                    elif w.find('-') <> -1:
+                        words[i] = '-'.join(
+                            [s.capitalize() for s in w.split('-')])
                    else:
                        words[i] = w.capitalize()
                code = code.lower()
@@ -234,7 +222,7 @@ def main():
            dump = 1
        elif opt in ('-p', '--parse'):
            parsefile = arg
-	elif opt in ('-o', '--output'):
+        elif opt in ('-o', '--outputdict'):
            normalize = 1
        elif opt in ('-r', '--reverse'):
            forcerev = 1
@@ -267,14 +255,23 @@ def main():

 # The mappings
 nameorgs = {
+    # New top level domains as described by ICANN
+    # http://www.icann.org/tlds/
+    "aero": "air-transport industry",
    "arpa": "Arpanet",
+    "biz": "business",
    "com": "commercial",
+    "coop": "cooperatives",
    "edu": "educational",
    "gov": "government",
+    "info": "unrestricted `info'",
+    "int": "international",
    "mil": "military",
+    "museum": "museums",
+    "name": "`name' (for registration by individuals)",
    "net": "networking",
    "org": "non-commercial",
-    "int": "international",
+    "pro": "professionals",
    # This isn't in the same class as those above, but is included here
    # because `uk' is the common practice country code for the United Kingdom.
    # AFAICT, the official `gb' code is routinely ignored!
@@ -525,7 +522,7 @@ countries = {
    "ve": "Venezuela",
    "vn": "Viet Nam",
    "vg": "Virgin Islands, British",
-    "vi": "Virgin Islands, U.s.",
+    "vi": "Virgin Islands, U.S.",
    "wf": "Wallis and Futuna",
    "eh": "Western Sahara",
    "ye": "Yemen",