Commit 8a2df4d8 authored by Guido van Rossum's avatar Guido van Rossum

Catch I/O errors when parsing robots.txt file.

Add version number, printed at startup in non-quited mode.
parent 7fcb33f5
......@@ -93,6 +93,8 @@ rooturl -- URL to start checking
"""
__version__ = "0.1"
import sys
import os
......@@ -135,7 +137,6 @@ def main():
except getopt.error, msg:
sys.stdout = sys.stderr
print msg
print __doc__ % globals()
sys.exit(2)
for o, a in opts:
if o == '-R':
......@@ -151,6 +152,9 @@ def main():
if o == '-v':
verbose = verbose + 1
if verbose:
print AGENTNAME, "version", __version__
if restart:
if verbose > 0:
print "Loading checkpoint from %s ..." % dumpfile
......@@ -234,13 +238,17 @@ class Checker:
self.addrobot(root)
def addrobot(self, root):
url = urlparse.urljoin(root, "/robots.txt")
self.robots[root] = rp = robotparser.RobotFileParser()
if verbose > 3:
print "Parsing robots.txt file"
if verbose > 2:
print "Parsing", url
rp.debug = 1
url = urlparse.urljoin(root, "/robots.txt")
rp.set_url(url)
rp.read()
try:
rp.read()
except IOError, msg:
if verbose > 1:
print "I/O error parsing", url, ":", msg
def run(self):
while self.todo:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment