Commit 67e22c26 authored by Guido van Rossum's avatar Guido van Rossum

use caching of temp files; added cleanup

parent 5da5755c
...@@ -37,6 +37,9 @@ def urlretrieve(url): ...@@ -37,6 +37,9 @@ def urlretrieve(url):
if not _urlopener: if not _urlopener:
_urlopener = URLopener() _urlopener = URLopener()
return _urlopener.retrieve(url) return _urlopener.retrieve(url)
def urlcleanup():
if _urlopener:
_urlopener.cleanup()
# Class to open URLs. # Class to open URLs.
...@@ -48,7 +51,7 @@ class URLopener: ...@@ -48,7 +51,7 @@ class URLopener:
# Constructor # Constructor
def __init__(self): def __init__(self):
self.addheaders = [] self.addheaders = []
self.tempfiles = [] self.tempcache = {}
self.ftpcache = ftpcache self.ftpcache = ftpcache
# Undocumented feature: you can use a different # Undocumented feature: you can use a different
# ftp cache by assigning to the .ftpcache member; # ftp cache by assigning to the .ftpcache member;
...@@ -62,11 +65,12 @@ class URLopener: ...@@ -62,11 +65,12 @@ class URLopener:
def cleanup(self): def cleanup(self):
import os import os
for tfn in self.tempfiles: for url in self.tempcache.keys():
try: try:
os.unlink(tfn) os.unlink(self.tempcache[url][0])
except os.error: except os.error:
pass pass
del self.tempcache[url]
# Add a header to be used by the HTTP interface only # Add a header to be used by the HTTP interface only
# e.g. u.addheader('Accept', 'sound/basic') # e.g. u.addheader('Accept', 'sound/basic')
...@@ -93,7 +97,13 @@ class URLopener: ...@@ -93,7 +97,13 @@ class URLopener:
# retrieve(url) returns (filename, None) for a local object # retrieve(url) returns (filename, None) for a local object
# or (tempfilename, headers) for a remote object # or (tempfilename, headers) for a remote object
def retrieve(self, url): def retrieve(self, url):
type, url1 = splittype(unwrap(url)) if self.tempcache.has_key(url):
return self.tempcache[url]
url1 = unwrap(url)
if self.tempcache.has_key(url1):
self.tempcache[url] = self.tempcache[url1]
return self.tempcache[url1]
type, url1 = splittype(url1)
if not type or type == 'file': if not type or type == 'file':
try: try:
fp = self.open_local_file(url1) fp = self.open_local_file(url1)
...@@ -102,19 +112,19 @@ class URLopener: ...@@ -102,19 +112,19 @@ class URLopener:
except IOError, msg: except IOError, msg:
pass pass
fp = self.open(url) fp = self.open(url)
headers = fp.info()
import tempfile import tempfile
tfn = tempfile.mktemp() tfn = tempfile.mktemp()
self.tempfiles.append(tfn) self.tempcache[url] = result = tfn, headers
tfp = open(tfn, 'w') tfp = open(tfn, 'w')
bs = 1024*8 bs = 1024*8
block = fp.read(bs) block = fp.read(bs)
while block: while block:
tfp.write(block) tfp.write(block)
block = fp.read(bs) block = fp.read(bs)
headers = fp.info()
del fp del fp
del tfp del tfp
return tfn, headers return result
# Each method named open_<type> knows how to open that type of URL # Each method named open_<type> knows how to open that type of URL
...@@ -370,7 +380,7 @@ def test(): ...@@ -370,7 +380,7 @@ def test():
fn, h = None, None fn, h = None, None
print '-'*40 print '-'*40
finally: finally:
_urlopener.cleanup() urlcleanup()
# Run test program when run as a script # Run test program when run as a script
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment