Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
749057be
Commit
749057be
authored
Feb 22, 1994
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Redesigned as a class
parent
76ca3c17
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
193 additions
and
114 deletions
+193
-114
Lib/urlopen.py
Lib/urlopen.py
+193
-114
No files found.
Lib/urlopen.py
View file @
749057be
...
...
@@ -6,110 +6,170 @@
# IETF URL Working Group 14 July 1993
# draft-ietf-uri-url-01.txt
#
# The object returned by urlopen() will differ per protocol.
# All you know is that is has methods read(), fileno(), close() and info().
# The read(), fileno() and close() methods work like those of open files.
# The object returned by URLopener().open(file) will differ per
# protocol. All you know is that is has methods read(), readline(),
# readlines(), fileno(), close() and info(). The read*(), fileno()
# and close() methods work like those of open files.
# The info() method returns an rfc822.Message object which can be
# used to query various info about the object, if available.
# (rfc822.Message objects are queried with the getheader() method.)
import
socket
import
regex
import
regsub
import
string
import
rfc822
import
ftplib
# External interface -- use urlopen(file) as if it were open(file, 'r')
# This really consists of two pieces:
# (1) a class which handles opening of all sorts of URLs
# (plus assorted utilities etc.)
# (2) a set of functions for parsing URLs
# XXX Should these be separated out into different modules?
# Shortcut for basic usage
_urlopener
=
None
def
urlopen
(
url
):
url
=
string
.
strip
(
url
)
if
url
[:
1
]
==
'<'
and
url
[
-
1
:]
==
'>'
:
url
=
string
.
strip
(
url
[
1
:
-
1
])
if
url
[:
4
]
==
'URL:'
:
url
=
string
.
strip
(
url
[
4
:])
type
,
url
=
splittype
(
url
)
if
not
type
:
type
=
'file'
type
=
regsub
.
gsub
(
'-'
,
'_'
,
type
)
try
:
func
=
eval
(
'open_'
+
type
)
except
NameError
:
raise
IOError
,
(
'url error'
,
'unknown url type'
,
type
)
try
:
return
func
(
url
)
except
socket
.
error
,
msg
:
raise
IOError
,
(
'socket error'
,
msg
)
# Each routine of the form open_<type> knows how to open that type of URL
# Use HTTP protocol
def
open_http
(
url
):
import
httplib
host
,
selector
=
splithost
(
url
)
h
=
httplib
.
HTTP
(
host
)
h
.
putrequest
(
'GET'
,
selector
)
errcode
,
errmsg
,
headers
=
h
.
getreply
()
if
errcode
==
200
:
return
makefile
(
h
.
getfile
(),
headers
)
else
:
raise
IOError
,
(
'http error'
,
errcode
,
errmsg
,
headers
)
# Empty rfc822.Message object
noheaders
=
rfc822
.
Message
(
open
(
'/dev/null'
,
'r'
))
noheaders
.
fp
.
close
()
# Recycle file descriptor
# Use Gopher protocol
def
open_gopher
(
url
):
import
gopherlib
host
,
selector
=
splithost
(
url
)
type
,
selector
=
splitgophertype
(
selector
)
selector
,
query
=
splitquery
(
selector
)
if
query
:
fp
=
gopherlib
.
send_query
(
selector
,
query
,
host
)
else
:
fp
=
gopherlib
.
send_selector
(
selector
,
host
)
return
makefile
(
fp
,
noheaders
)
# Use local file or FTP depending on form of URL
localhost
=
socket
.
gethostbyname
(
'localhost'
)
thishost
=
socket
.
gethostbyname
(
socket
.
gethostname
())
def
open_file
(
url
):
host
,
file
=
splithost
(
url
)
if
not
host
:
return
makefile
(
open
(
file
,
'r'
),
noheaders
)
host
,
port
=
splitport
(
host
)
if
not
port
and
socket
.
gethostbyname
(
host
)
in
(
localhost
,
thishost
):
try
:
fp
=
open
(
file
,
'r'
)
except
IOError
:
fp
=
None
if
fp
:
return
makefile
(
fp
,
noheaders
)
return
open_ftp
(
url
)
# Use FTP protocol
global
_urlopener
if
not
_urlopener
:
_urlopener
=
URLopener
()
return
_urlopener
.
open
(
url
)
# Class to open URLs.
# This is a class rather than just a subroutine because we may need
# more than one set of global protocol-specific options.
ftpcache
=
{}
ftperrors
=
(
ftplib
.
error_reply
,
ftplib
.
error_temp
,
ftplib
.
error_perm
,
ftplib
.
error_proto
)
def
open_ftp
(
url
):
host
,
file
=
splithost
(
url
)
host
,
port
=
splitport
(
host
)
host
=
socket
.
gethostbyname
(
host
)
if
not
port
:
port
=
ftplib
.
FTP_PORT
key
=
(
host
,
port
)
try
:
if
not
ftpcache
.
has_key
(
key
):
ftpcache
[
key
]
=
ftpwrapper
(
host
,
port
)
return
makefile
(
ftpcache
[
key
].
retrfile
(
file
),
noheaders
)
except
ftperrors
,
msg
:
raise
IOError
,
(
'ftp error'
,
msg
)
class
URLopener
:
# Constructor
def
__init__
(
self
):
self
.
addheaders
=
[]
self
.
ftpcache
=
ftpcache
# Undocumented feature: you can use a different
# ftp cache by assigning to the .ftpcache member;
# in case you want logically independent URL openers
# Utility classes
# Add a header to be used by the HTTP interface only
# e.g. u.addheader('Accept', 'sound/basic')
def
addheader
(
self
,
*
args
):
self
.
addheaders
.
append
(
args
)
# Class used to add an info() method to a file object
class
makefile
:
def
__init__
(
self
,
fp
,
headers
):
self
.
fp
=
fp
self
.
headers
=
headers
self
.
read
=
self
.
fp
.
read
self
.
fileno
=
self
.
fp
.
fileno
self
.
close
=
self
.
fp
.
close
def
info
(
self
):
return
self
.
headers
# External interface
# Use URLopener().open(file) instead of open(file, 'r')
def
open
(
self
,
url
):
import
string
url
=
string
.
strip
(
url
)
if
url
[:
1
]
==
'<'
and
url
[
-
1
:]
==
'>'
:
url
=
string
.
strip
(
url
[
1
:
-
1
])
if
url
[:
4
]
==
'URL:'
:
url
=
string
.
strip
(
url
[
4
:])
type
,
url
=
splittype
(
url
)
if
not
type
:
type
=
'file'
name
=
'open_'
+
type
if
'-'
in
name
:
import
regsub
name
=
regsub
.
gsub
(
'-'
,
'_'
,
name
)
if
not
hasattr
(
self
,
name
):
raise
IOError
,
(
'url error'
,
'unknown url type'
,
type
)
meth
=
getattr
(
self
,
name
)
try
:
return
meth
(
url
)
except
socket
.
error
,
msg
:
raise
IOError
,
(
'socket error'
,
msg
)
# Each method named open_<type> knows how to open that type of URL
# Use HTTP protocol
def
open_http
(
self
,
url
):
import
httplib
host
,
selector
=
splithost
(
url
)
h
=
httplib
.
HTTP
(
host
)
h
.
putrequest
(
'GET'
,
selector
)
for
args
in
self
.
addheaders
:
apply
(
h
.
putheader
,
args
)
errcode
,
errmsg
,
headers
=
h
.
getreply
()
if
errcode
==
200
:
return
addinfo
(
h
.
getfile
(),
headers
)
else
:
raise
IOError
,
(
'http error'
,
errcode
,
errmsg
,
headers
)
# Use Gopher protocol
def
open_gopher
(
self
,
url
):
import
gopherlib
host
,
selector
=
splithost
(
url
)
type
,
selector
=
splitgophertype
(
selector
)
selector
,
query
=
splitquery
(
selector
)
if
query
:
fp
=
gopherlib
.
send_query
(
selector
,
query
,
host
)
else
:
fp
=
gopherlib
.
send_selector
(
selector
,
host
)
return
addinfo
(
fp
,
noheaders
())
# Use local file or FTP depending on form of URL
def
open_file
(
self
,
url
):
host
,
file
=
splithost
(
url
)
if
not
host
:
return
addinfo
(
open
(
file
,
'r'
),
noheaders
())
host
,
port
=
splitport
(
host
)
if
not
port
and
socket
.
gethostbyname
(
host
)
in
(
localhost
(),
thishost
()):
try
:
fp
=
open
(
file
,
'r'
)
except
IOError
:
fp
=
None
if
fp
:
return
addinfo
(
fp
,
noheaders
())
return
self
.
open_ftp
(
url
)
# Use FTP protocol
def
open_ftp
(
self
,
url
):
host
,
file
=
splithost
(
url
)
host
,
port
=
splitport
(
host
)
host
=
socket
.
gethostbyname
(
host
)
if
not
port
:
import
ftplib
port
=
ftplib
.
FTP_PORT
key
=
(
host
,
port
)
try
:
if
not
self
.
ftpcache
.
has_key
(
key
):
self
.
ftpcache
[
key
]
=
ftpwrapper
(
host
,
port
)
return
addinfo
(
self
.
ftpcache
[
key
].
retrfile
(
file
),
noheaders
())
except
ftperrors
(),
msg
:
raise
IOError
,
(
'ftp error'
,
msg
)
# Utility functions
# Return the IP address of the magic hostname 'localhost'
_localhost
=
None
def
localhost
():
global
_localhost
if
not
_localhost
:
_localhost
=
socket
.
gethostbyname
(
'localhost'
)
return
_localhost
# Return the IP address of the current host
_thishost
=
None
def
thishost
():
global
_thishost
if
not
_thishost
:
_thishost
=
socket
.
gethostbyname
(
socket
.
gethostname
())
return
_thishost
# Return the set of errors raised by the FTP class
_ftperrors
=
None
def
ftperrors
():
global
_ftperrors
if
not
_ftperrors
:
import
ftplib
_ftperrors
=
(
ftplib
.
error_reply
,
ftplib
.
error_temp
,
ftplib
.
error_perm
,
ftplib
.
error_proto
)
return
_ftperrors
# Return an empty rfc822.Message object
_noheaders
=
None
def
noheaders
():
global
_noheaders
if
not
_noheaders
:
import
rfc822
_noheaders
=
rfc822
.
Message
(
open
(
'/dev/null'
,
'r'
))
_noheaders
.
fp
.
close
()
# Recycle file descriptor
return
_noheaders
# Utility classes
# Class used by open_ftp() for cache of open FTP connections
class
ftpwrapper
:
...
...
@@ -118,10 +178,12 @@ class ftpwrapper:
self
.
port
=
port
self
.
init
()
def
init
(
self
):
import
ftplib
self
.
ftp
=
ftplib
.
FTP
()
self
.
ftp
.
connect
(
self
.
host
,
self
.
port
)
self
.
ftp
.
login
()
def
retrfile
(
self
,
file
):
import
ftplib
try
:
self
.
ftp
.
voidcmd
(
'TYPE I'
)
except
ftplib
.
all_errors
:
...
...
@@ -140,27 +202,43 @@ class ftpwrapper:
if
file
:
cmd
=
'NLST '
+
file
else
:
cmd
=
'NLST'
conn
=
self
.
ftp
.
transfercmd
(
cmd
)
return
fakefile
(
self
.
ftp
,
conn
)
# Class used by ftpwrapper to handle response when transfer is complete
class
fakefile
:
def
__init__
(
self
,
ftp
,
conn
):
self
.
ftp
=
ftp
self
.
conn
=
conn
self
.
fp
=
self
.
conn
.
makefile
(
'r'
)
return
addclosehook
(
conn
.
makefile
(
'r'
),
self
.
ftp
.
voidresp
)
# Base class for addinfo and addclosehook
class
addbase
:
def
__init__
(
self
,
fp
):
self
.
fp
=
fp
self
.
read
=
self
.
fp
.
read
self
.
readline
=
self
.
fp
.
readline
self
.
readlines
=
self
.
fp
.
readlines
self
.
fileno
=
self
.
fp
.
fileno
def
__del__
(
self
):
self
.
close
()
def
close
(
self
):
self
.
conn
=
None
self
.
fp
=
None
self
.
read
=
None
if
self
.
ftp
:
self
.
ftp
.
voidresp
()
self
.
ftp
=
None
# Class to add a close hook to an open file
class
addclosehook
(
addbase
):
def
__init__
(
self
,
fp
,
closehook
,
*
hookargs
):
addbase
.
__init__
(
self
,
fp
)
self
.
closehook
=
closehook
self
.
hookargs
=
hookargs
def
close
(
self
):
if
self
.
closehook
:
apply
(
self
.
closehook
,
self
.
hookargs
)
self
.
closehook
=
None
self
.
fp
=
None
# class to add an info() method to an open file
class
addinfo
(
addbase
):
def
__init__
(
self
,
fp
,
headers
):
addbase
.
__init__
(
self
,
fp
)
self
.
headers
=
headers
def
info
(
self
):
return
self
.
headers
# Utilities to
split url parts into component
s:
# Utilities to
parse URL
s:
# splittype('type:opaquestring') --> 'type', 'opaquestring'
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
# splitport('host:port') --> 'host', 'port'
...
...
@@ -168,29 +246,29 @@ class fakefile:
# splittag('/path#tag') --> '/path', 'tag'
# splitgophertype('/Xselector') --> 'X', 'selector'
typeprog
=
regex
.
compile
(
'^
\
([^/:]+
\
):
\
(.*
\
)$'
)
_
typeprog
=
regex
.
compile
(
'^
\
([^/:]+
\
):
\
(.*
\
)$'
)
def
splittype
(
url
):
if
typeprog
.
match
(
url
)
>=
0
:
return
typeprog
.
group
(
1
,
2
)
if
_typeprog
.
match
(
url
)
>=
0
:
return
_
typeprog
.
group
(
1
,
2
)
return
None
,
url
hostprog
=
regex
.
compile
(
'^//
\
([^/]+
\
)
\
(.*
\
)$'
)
_
hostprog
=
regex
.
compile
(
'^//
\
([^/]+
\
)
\
(.*
\
)$'
)
def
splithost
(
url
):
if
hostprog
.
match
(
url
)
>=
0
:
return
hostprog
.
group
(
1
,
2
)
if
_hostprog
.
match
(
url
)
>=
0
:
return
_
hostprog
.
group
(
1
,
2
)
return
None
,
url
portprog
=
regex
.
compile
(
'^
\
(.*
\
):
\
([
0
-9]+
\
)$
'
)
_
portprog
=
regex
.
compile
(
'^
\
(.*
\
):
\
([
0
-9]+
\
)$
'
)
def splitport(host):
if
portprog.match(host) >= 0: return
portprog.group(1, 2)
if
_portprog.match(host) >= 0: return _
portprog.group(1, 2)
return host, None
queryprog = regex.compile('
^
\
(.
*
\
)
\
?
\
([
^
?
]
*
\
)
$
')
_
queryprog = regex.compile('
^
\
(.
*
\
)
\
?
\
([
^
?
]
*
\
)
$
')
def splitquery(url):
if
queryprog.match(url) >= 0: return
queryprog.group(1, 2)
if
_queryprog.match(url) >= 0: return _
queryprog.group(1, 2)
return url, None
tagprog = regex.compile('
^
\
(.
*
\
)
#\([^#]*\)$')
_
tagprog = regex.compile('
^
\
(.
*
\
)
#\([^#]*\)$')
def
splittag
(
url
):
if
tagprog
.
match
(
url
)
>=
0
:
return
tagprog
.
group
(
1
,
2
)
if
_tagprog
.
match
(
url
)
>=
0
:
return
_
tagprog
.
group
(
1
,
2
)
return
url
,
None
def
splitgophertype
(
selector
):
...
...
@@ -202,6 +280,7 @@ def splitgophertype(selector):
# Test program
def
test
():
import
sys
import
regsub
args
=
sys
.
argv
[
1
:]
if
not
args
:
args
=
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment