Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
94bf62f9
Commit
94bf62f9
authored
Sep 11, 2000
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Deleted ancient file.
parent
b655939d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
410 deletions
+0
-410
Lib/urlopen.py
Lib/urlopen.py
+0
-410
No files found.
Lib/urlopen.py
deleted
100755 → 0
View file @
b655939d
# Open an arbitrary URL
#
# See the following document for a tentative description of URLs:
# Uniform Resource Locators Tim Berners-Lee
# INTERNET DRAFT CERN
# IETF URL Working Group 14 July 1993
# draft-ietf-uri-url-01.txt
#
# The object returned by URLopener().open(file) will differ per
# protocol. All you know is that is has methods read(), readline(),
# readlines(), fileno(), close() and info(). The read*(), fileno()
# and close() methods work like those of open files.
# The info() method returns an rfc822.Message object which can be
# used to query various info about the object, if available.
# (rfc822.Message objects are queried with the getheader() method.)
import
socket
import
regex
# This really consists of two pieces:
# (1) a class which handles opening of all sorts of URLs
# (plus assorted utilities etc.)
# (2) a set of functions for parsing URLs
# XXX Should these be separated out into different modules?
# Shortcut for basic usage
_urlopener
=
None
def
urlopen
(
url
):
global
_urlopener
if
not
_urlopener
:
_urlopener
=
URLopener
()
return
_urlopener
.
open
(
url
)
def
urlretrieve
(
url
):
global
_urlopener
if
not
_urlopener
:
_urlopener
=
URLopener
()
return
_urlopener
.
retrieve
(
url
)
def
urlcleanup
():
if
_urlopener
:
_urlopener
.
cleanup
()
# Class to open URLs.
# This is a class rather than just a subroutine because we may need
# more than one set of global protocol-specific options.
ftpcache
=
{}
class
URLopener
:
# Constructor
def
__init__
(
self
):
self
.
addheaders
=
[]
self
.
tempcache
=
{}
self
.
ftpcache
=
ftpcache
# Undocumented feature: you can use a different
# ftp cache by assigning to the .ftpcache member;
# in case you want logically independent URL openers
def
__del__
(
self
):
self
.
close
()
def
close
(
self
):
self
.
cleanup
()
def
cleanup
(
self
):
import
os
for
url
in
self
.
tempcache
.
keys
():
try
:
os
.
unlink
(
self
.
tempcache
[
url
][
0
])
except
os
.
error
:
pass
del
self
.
tempcache
[
url
]
# Add a header to be used by the HTTP interface only
# e.g. u.addheader('Accept', 'sound/basic')
def
addheader
(
self
,
*
args
):
self
.
addheaders
.
append
(
args
)
# External interface
# Use URLopener().open(file) instead of open(file, 'r')
def
open
(
self
,
url
):
type
,
url
=
splittype
(
unwrap
(
url
))
if
not
type
:
type
=
'file'
name
=
'open_'
+
type
if
'-'
in
name
:
import
regsub
name
=
regsub
.
gsub
(
'-'
,
'_'
,
name
)
if
not
hasattr
(
self
,
name
):
raise
IOError
,
(
'url error'
,
'unknown url type'
,
type
)
try
:
return
getattr
(
self
,
name
)(
url
)
except
socket
.
error
,
msg
:
raise
IOError
,
(
'socket error'
,
msg
)
# External interface
# retrieve(url) returns (filename, None) for a local object
# or (tempfilename, headers) for a remote object
def
retrieve
(
self
,
url
):
if
self
.
tempcache
.
has_key
(
url
):
return
self
.
tempcache
[
url
]
url1
=
unwrap
(
url
)
if
self
.
tempcache
.
has_key
(
url1
):
self
.
tempcache
[
url
]
=
self
.
tempcache
[
url1
]
return
self
.
tempcache
[
url1
]
type
,
url1
=
splittype
(
url1
)
if
not
type
or
type
==
'file'
:
try
:
fp
=
self
.
open_local_file
(
url1
)
del
fp
return
splithost
(
url1
)[
1
],
None
except
IOError
,
msg
:
pass
fp
=
self
.
open
(
url
)
headers
=
fp
.
info
()
import
tempfile
tfn
=
tempfile
.
mktemp
()
self
.
tempcache
[
url
]
=
result
=
tfn
,
headers
tfp
=
open
(
tfn
,
'w'
)
bs
=
1024
*
8
block
=
fp
.
read
(
bs
)
while
block
:
tfp
.
write
(
block
)
block
=
fp
.
read
(
bs
)
del
fp
del
tfp
return
result
# Each method named open_<type> knows how to open that type of URL
# Use HTTP protocol
def
open_http
(
self
,
url
):
import
httplib
host
,
selector
=
splithost
(
url
)
h
=
httplib
.
HTTP
(
host
)
h
.
putrequest
(
'GET'
,
selector
)
for
args
in
self
.
addheaders
:
apply
(
h
.
putheader
,
args
)
errcode
,
errmsg
,
headers
=
h
.
getreply
()
if
errcode
==
200
:
return
addinfo
(
h
.
getfile
(),
headers
)
else
:
raise
IOError
,
(
'http error'
,
errcode
,
errmsg
,
headers
)
# Use Gopher protocol
def
open_gopher
(
self
,
url
):
import
gopherlib
host
,
selector
=
splithost
(
url
)
type
,
selector
=
splitgophertype
(
selector
)
selector
,
query
=
splitquery
(
selector
)
if
query
:
fp
=
gopherlib
.
send_query
(
selector
,
query
,
host
)
else
:
fp
=
gopherlib
.
send_selector
(
selector
,
host
)
return
addinfo
(
fp
,
noheaders
())
# Use local file or FTP depending on form of URL
def
open_file
(
self
,
url
):
try
:
return
self
.
open_local_file
(
url
)
except
IOError
:
return
self
.
open_ftp
(
url
)
# Use local file
def
open_local_file
(
self
,
url
):
host
,
file
=
splithost
(
url
)
if
not
host
:
return
addinfo
(
open
(
file
,
'r'
),
noheaders
())
host
,
port
=
splitport
(
host
)
if
not
port
and
socket
.
gethostbyname
(
host
)
in
(
localhost
(),
thishost
()):
return
addinfo
(
open
(
file
,
'r'
),
noheaders
())
raise
IOError
,
(
'local file error'
,
'not on local host'
)
# Use FTP protocol
def
open_ftp
(
self
,
url
):
host
,
file
=
splithost
(
url
)
if
not
host
:
raise
IOError
,
(
'ftp error'
,
'no host given'
)
host
,
port
=
splitport
(
host
)
host
=
socket
.
gethostbyname
(
host
)
if
not
port
:
import
ftplib
port
=
ftplib
.
FTP_PORT
key
=
(
host
,
port
)
try
:
if
not
self
.
ftpcache
.
has_key
(
key
):
self
.
ftpcache
[
key
]
=
ftpwrapper
(
host
,
port
)
return
addinfo
(
self
.
ftpcache
[
key
].
retrfile
(
file
),
noheaders
())
except
ftperrors
(),
msg
:
raise
IOError
,
(
'ftp error'
,
msg
)
# Utility functions
# Return the IP address of the magic hostname 'localhost'
_localhost
=
None
def
localhost
():
global
_localhost
if
not
_localhost
:
_localhost
=
socket
.
gethostbyname
(
'localhost'
)
return
_localhost
# Return the IP address of the current host
_thishost
=
None
def
thishost
():
global
_thishost
if
not
_thishost
:
_thishost
=
socket
.
gethostbyname
(
socket
.
gethostname
())
return
_thishost
# Return the set of errors raised by the FTP class
_ftperrors
=
None
def
ftperrors
():
global
_ftperrors
if
not
_ftperrors
:
import
ftplib
_ftperrors
=
(
ftplib
.
error_reply
,
ftplib
.
error_temp
,
ftplib
.
error_perm
,
ftplib
.
error_proto
)
return
_ftperrors
# Return an empty rfc822.Message object
_noheaders
=
None
def
noheaders
():
global
_noheaders
if
not
_noheaders
:
import
rfc822
_noheaders
=
rfc822
.
Message
(
open
(
'/dev/null'
,
'r'
))
_noheaders
.
fp
.
close
()
# Recycle file descriptor
return
_noheaders
# Utility classes
# Class used by open_ftp() for cache of open FTP connections
class
ftpwrapper
:
def
__init__
(
self
,
host
,
port
):
self
.
host
=
host
self
.
port
=
port
self
.
init
()
def
init
(
self
):
import
ftplib
self
.
ftp
=
ftplib
.
FTP
()
self
.
ftp
.
connect
(
self
.
host
,
self
.
port
)
self
.
ftp
.
login
()
def
retrfile
(
self
,
file
):
import
ftplib
try
:
self
.
ftp
.
voidcmd
(
'TYPE I'
)
except
ftplib
.
all_errors
:
self
.
init
()
self
.
ftp
.
voidcmd
(
'TYPE I'
)
conn
=
None
if
file
:
try
:
cmd
=
'RETR '
+
file
conn
=
self
.
ftp
.
transfercmd
(
cmd
)
except
ftplib
.
error_perm
,
reason
:
if
reason
[:
3
]
!=
'550'
:
raise
IOError
,
(
'ftp error'
,
reason
)
if
not
conn
:
# Try a directory listing
if
file
:
cmd
=
'LIST '
+
file
else
:
cmd
=
'LIST'
conn
=
self
.
ftp
.
transfercmd
(
cmd
)
return
addclosehook
(
conn
.
makefile
(
'r'
),
self
.
ftp
.
voidresp
)
# Base class for addinfo and addclosehook
class
addbase
:
def
__init__
(
self
,
fp
):
self
.
fp
=
fp
self
.
read
=
self
.
fp
.
read
self
.
readline
=
self
.
fp
.
readline
self
.
readlines
=
self
.
fp
.
readlines
self
.
fileno
=
self
.
fp
.
fileno
def
__repr__
(
self
):
return
'<%s at %s whose fp = %s>'
%
(
self
.
__class__
.
__name__
,
`id(self)`
,
`self.fp`
)
def
__del__
(
self
):
self
.
close
()
def
close
(
self
):
self
.
read
=
None
self
.
readline
=
None
self
.
readlines
=
None
self
.
fileno
=
None
self
.
fp
=
None
# Class to add a close hook to an open file
class
addclosehook
(
addbase
):
def
__init__
(
self
,
fp
,
closehook
,
*
hookargs
):
addbase
.
__init__
(
self
,
fp
)
self
.
closehook
=
closehook
self
.
hookargs
=
hookargs
def
close
(
self
):
if
self
.
closehook
:
apply
(
self
.
closehook
,
self
.
hookargs
)
self
.
closehook
=
None
self
.
hookargs
=
None
addbase
.
close
(
self
)
# class to add an info() method to an open file
class
addinfo
(
addbase
):
def
__init__
(
self
,
fp
,
headers
):
addbase
.
__init__
(
self
,
fp
)
self
.
headers
=
headers
def
info
(
self
):
return
self
.
headers
# Utility to combine a URL with a base URL to form a new URL
def
basejoin
(
base
,
url
):
type
,
path
=
splittype
(
url
)
if
type
:
return
url
host
,
path
=
splithost
(
path
)
basetype
,
basepath
=
splittype
(
base
)
basehost
,
basepath
=
splithost
(
basepath
)
basepath
,
basetag
=
splittag
(
basepath
)
basepath
,
basequery
=
splitquery
(
basepath
)
type
=
basetype
or
'file'
if
path
[:
1
]
!=
'/'
:
import
string
i
=
string
.
rfind
(
basepath
,
'/'
)
if
i
<
0
:
basepath
=
'/'
else
:
basepath
=
basepath
[:
i
+
1
]
path
=
basepath
+
path
if
not
host
:
host
=
basehost
if
host
:
return
type
+
'://'
+
host
+
path
else
:
return
type
+
':'
+
path
# Utilities to parse URLs:
# unwrap('<URL:type//host/path>') --> 'type//host/path'
# splittype('type:opaquestring') --> 'type', 'opaquestring'
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
# splitport('host:port') --> 'host', 'port'
# splitquery('/path?query') --> '/path', 'query'
# splittag('/path#tag') --> '/path', 'tag'
# splitgophertype('/Xselector') --> 'X', 'selector'
def
unwrap
(
url
):
import
string
url
=
string
.
strip
(
url
)
if
url
[:
1
]
==
'<'
and
url
[
-
1
:]
==
'>'
:
url
=
string
.
strip
(
url
[
1
:
-
1
])
if
url
[:
4
]
==
'URL:'
:
url
=
string
.
strip
(
url
[
4
:])
return
url
_typeprog
=
regex
.
compile
(
'^
\
([^/:]+
\
):
\
(.*
\
)$'
)
def
splittype
(
url
):
if
_typeprog
.
match
(
url
)
>=
0
:
return
_typeprog
.
group
(
1
,
2
)
return
None
,
url
_hostprog
=
regex
.
compile
(
'^//
\
([^/]+
\
)
\
(.*
\
)$'
)
def
splithost
(
url
):
if
_hostprog
.
match
(
url
)
>=
0
:
return
_hostprog
.
group
(
1
,
2
)
return
None
,
url
_portprog
=
regex
.
compile
(
'^
\
(.*
\
):
\
([
0
-9]+
\
)$
'
)
def splitport(host):
if _portprog.match(host) >= 0: return _portprog.group(1, 2)
return host, None
_queryprog = regex.compile('
^
\
(.
*
\
)
\
?
\
([
^
?
]
*
\
)
$
')
def splitquery(url):
if _queryprog.match(url) >= 0: return _queryprog.group(1, 2)
return url, None
_tagprog = regex.compile('
^
\
(.
*
\
)
#\([^#]*\)$')
def
splittag
(
url
):
if
_tagprog
.
match
(
url
)
>=
0
:
return
_tagprog
.
group
(
1
,
2
)
return
url
,
None
def
splitgophertype
(
selector
):
if
selector
[:
1
]
==
'/'
and
selector
[
1
:
2
]:
return
selector
[
1
],
selector
[
2
:]
return
None
,
selector
# Test program
def
test
():
import
sys
import
regsub
args
=
sys
.
argv
[
1
:]
if
not
args
:
args
=
[
'/etc/passwd'
,
'file:/etc/passwd'
,
'file://localhost/etc/passwd'
,
'ftp://ftp.cwi.nl/etc/passwd'
,
'gopher://gopher.cwi.nl/11/'
,
'http://www.cwi.nl/index.html'
,
]
try
:
for
url
in
args
:
print
'-'
*
10
,
url
,
'-'
*
10
fn
,
h
=
urlretrieve
(
url
)
print
fn
,
h
if
h
:
print
'======'
for
k
in
h
.
keys
():
print
k
+
':'
,
h
[
k
]
print
'======'
fp
=
open
(
fn
,
'r'
)
data
=
fp
.
read
()
del
fp
print
regsub
.
gsub
(
'
\
r
'
,
''
,
data
)
fn
,
h
=
None
,
None
print
'-'
*
40
finally
:
urlcleanup
()
# Run test program when run as a script
if
__name__
==
'__main__'
:
test
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment