Commit 8a2df4d8 authored by Guido van Rossum's avatar Guido van Rossum

Catch I/O errors when parsing robots.txt file.

Add version number, printed at startup in non-quited mode.
parent 7fcb33f5
...@@ -93,6 +93,8 @@ rooturl -- URL to start checking ...@@ -93,6 +93,8 @@ rooturl -- URL to start checking
""" """
__version__ = "0.1"
import sys import sys
import os import os
...@@ -135,7 +137,6 @@ def main(): ...@@ -135,7 +137,6 @@ def main():
except getopt.error, msg: except getopt.error, msg:
sys.stdout = sys.stderr sys.stdout = sys.stderr
print msg print msg
print __doc__ % globals()
sys.exit(2) sys.exit(2)
for o, a in opts: for o, a in opts:
if o == '-R': if o == '-R':
...@@ -151,6 +152,9 @@ def main(): ...@@ -151,6 +152,9 @@ def main():
if o == '-v': if o == '-v':
verbose = verbose + 1 verbose = verbose + 1
if verbose:
print AGENTNAME, "version", __version__
if restart: if restart:
if verbose > 0: if verbose > 0:
print "Loading checkpoint from %s ..." % dumpfile print "Loading checkpoint from %s ..." % dumpfile
...@@ -234,13 +238,17 @@ class Checker: ...@@ -234,13 +238,17 @@ class Checker:
self.addrobot(root) self.addrobot(root)
def addrobot(self, root): def addrobot(self, root):
url = urlparse.urljoin(root, "/robots.txt")
self.robots[root] = rp = robotparser.RobotFileParser() self.robots[root] = rp = robotparser.RobotFileParser()
if verbose > 3: if verbose > 2:
print "Parsing robots.txt file" print "Parsing", url
rp.debug = 1 rp.debug = 1
url = urlparse.urljoin(root, "/robots.txt")
rp.set_url(url) rp.set_url(url)
try:
rp.read() rp.read()
except IOError, msg:
if verbose > 1:
print "I/O error parsing", url, ":", msg
def run(self): def run(self):
while self.todo: while self.todo:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment