Commit ba3d6162 authored by Barry Warsaw's avatar Barry Warsaw

Added the 7 new top level domains, and reworded the nameorgs output.

Not sure this is better in all cases.

parse(): Fixed a bug in the output; the dict is referred to in the
code as `countries' not `country'.  Also added no-case-fold for the
string "U.S." since the Virgin Islands name no longer wraps those in
parentheses.

main(): Fixed the argument parsing to agree with the docstring, i.e.
--outputdict instead of --output.

In the module docstring:

- updated my email address
- we don't need to explain about Python 1.5 regexps <wink>

We also don't need to wrap the import of re with a try/except.

Other style fixes:

- untabification
- revert back to <> style everywhere (and consistently)
parent 25c97029
......@@ -3,7 +3,7 @@
"""world -- Print mappings between country names and DNS country codes.
Contact: Barry Warsaw
Email: bwarsaw@python.org
Email: barry@python.org
Version: %(__version__)s
This script will take a list of Internet addresses and print out where in the
......@@ -14,9 +14,9 @@ code found in the address. Addresses can be in any of the following forms:
host.domain.xx -- any Internet host or network name
somebody@where.xx -- an Internet email address
If no match is found, the address is interpreted as a regular expression [*]
and a reverse lookup is attempted. This script will search the country names
and print a list of matching entries. You can force reverse mappings with the
If no match is found, the address is interpreted as a regular expression and a
reverse lookup is attempted. This script will search the country names and
print a list of matching entries. You can force reverse mappings with the
`-r' flag (see below).
For example:
......@@ -34,10 +34,6 @@ For example:
tz: Tanzania, United Republic of
gb: United Kingdom
[*] Note that regular expressions must conform to Python 1.5's re.py module
syntax. The comparison is done with the search() method.
Country codes are maintained by the RIPE Network Coordination Centre,
in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The
authoritative source of country code mappings is:
......@@ -69,7 +65,7 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
When used in conjunction with the `-p' option, output is in the form
of a Python dictionary, and country names are normalized
w.r.t. capitalization. This makes it appropriate for cutting and
pasting back into this file.
pasting back into this file. Output is always to standard out.
--reverse
-r
......@@ -82,18 +78,13 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
-h
--help
Print this message.
"""
__version__ = '$Revision$'
import sys
import getopt
try:
import re
except ImportError:
print sys.argv[0], 'requires Python 1.5'
sys.exit(1)
import re
PROGRAM = sys.argv[0]
......@@ -114,11 +105,7 @@ def resolve(rawaddr):
return rawaddr
addr = parts[-1]
if nameorgs.has_key(addr):
if nameorgs[addr][0].lower() in 'aeiou':
ana = 'an'
else:
ana = 'a'
print rawaddr, 'is from', ana, nameorgs[addr], 'organization'
print rawaddr, 'is in the', nameorgs[addr], 'top level domain'
return None
elif countries.has_key(addr):
print rawaddr, 'originated from', countries[addr]
......@@ -161,7 +148,7 @@ def parse(file, normalize):
scanning = 0
if normalize:
print 'country = {'
print 'countries = {'
while 1:
line = fp.readline()
......@@ -186,16 +173,17 @@ def parse(file, normalize):
# XXX special cases
if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
words[i] = w.lower()
elif w == 'THE' and i != 1:
elif w == 'THE' and i <> 1:
words[i] = w.lower()
elif len(w) > 3 and w[1] == "'":
words[i] = w[0:3].upper() + w[3:].lower()
elif w == '(U.S.)':
elif w in ('(U.S.)', 'U.S.'):
pass
elif w[0] == '(' and w != '(local':
elif w[0] == '(' and w <> '(local':
words[i] = '(' + w[1:].capitalize()
elif w.find('-') != -1:
words[i] = '-'.join([s.capitalize() for s in w.split('-')])
elif w.find('-') <> -1:
words[i] = '-'.join(
[s.capitalize() for s in w.split('-')])
else:
words[i] = w.capitalize()
code = code.lower()
......@@ -234,7 +222,7 @@ def main():
dump = 1
elif opt in ('-p', '--parse'):
parsefile = arg
elif opt in ('-o', '--output'):
elif opt in ('-o', '--outputdict'):
normalize = 1
elif opt in ('-r', '--reverse'):
forcerev = 1
......@@ -267,14 +255,23 @@ def main():
# The mappings
nameorgs = {
# New top level domains as described by ICANN
# http://www.icann.org/tlds/
"aero": "air-transport industry",
"arpa": "Arpanet",
"biz": "business",
"com": "commercial",
"coop": "cooperatives",
"edu": "educational",
"gov": "government",
"info": "unrestricted `info'",
"int": "international",
"mil": "military",
"museum": "museums",
"name": "`name' (for registration by individuals)",
"net": "networking",
"org": "non-commercial",
"int": "international",
"pro": "professionals",
# This isn't in the same class as those above, but is included here
# because `uk' is the common practice country code for the United Kingdom.
# AFAICT, the official `gb' code is routinely ignored!
......@@ -525,7 +522,7 @@ countries = {
"ve": "Venezuela",
"vn": "Viet Nam",
"vg": "Virgin Islands, British",
"vi": "Virgin Islands, U.s.",
"vi": "Virgin Islands, U.S.",
"wf": "Wallis and Futuna",
"eh": "Western Sahara",
"ye": "Yemen",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment