Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
749057be
Commit
749057be
authored
Feb 22, 1994
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Redesigned as a class
parent
76ca3c17
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
193 additions
and
114 deletions
+193
-114
Lib/urlopen.py
Lib/urlopen.py
+193
-114
No files found.
Lib/urlopen.py
View file @
749057be
...
@@ -6,110 +6,170 @@
...
@@ -6,110 +6,170 @@
# IETF URL Working Group 14 July 1993
# IETF URL Working Group 14 July 1993
# draft-ietf-uri-url-01.txt
# draft-ietf-uri-url-01.txt
#
#
# The object returned by urlopen() will differ per protocol.
# The object returned by URLopener().open(file) will differ per
# All you know is that is has methods read(), fileno(), close() and info().
# protocol. All you know is that is has methods read(), readline(),
# The read(), fileno() and close() methods work like those of open files.
# readlines(), fileno(), close() and info(). The read*(), fileno()
# and close() methods work like those of open files.
# The info() method returns an rfc822.Message object which can be
# The info() method returns an rfc822.Message object which can be
# used to query various info about the object, if available.
# used to query various info about the object, if available.
# (rfc822.Message objects are queried with the getheader() method.)
# (rfc822.Message objects are queried with the getheader() method.)
import
socket
import
socket
import
regex
import
regex
import
regsub
import
string
import
rfc822
import
ftplib
# External interface -- use urlopen(file) as if it were open(file, 'r')
# This really consists of two pieces:
# (1) a class which handles opening of all sorts of URLs
# (plus assorted utilities etc.)
# (2) a set of functions for parsing URLs
# XXX Should these be separated out into different modules?
# Shortcut for basic usage
_urlopener
=
None
def
urlopen
(
url
):
def
urlopen
(
url
):
global
_urlopener
if
not
_urlopener
:
_urlopener
=
URLopener
()
return
_urlopener
.
open
(
url
)
# Class to open URLs.
# This is a class rather than just a subroutine because we may need
# more than one set of global protocol-specific options.
ftpcache
=
{}
class
URLopener
:
# Constructor
def
__init__
(
self
):
self
.
addheaders
=
[]
self
.
ftpcache
=
ftpcache
# Undocumented feature: you can use a different
# ftp cache by assigning to the .ftpcache member;
# in case you want logically independent URL openers
# Add a header to be used by the HTTP interface only
# e.g. u.addheader('Accept', 'sound/basic')
def
addheader
(
self
,
*
args
):
self
.
addheaders
.
append
(
args
)
# External interface
# Use URLopener().open(file) instead of open(file, 'r')
def
open
(
self
,
url
):
import
string
url
=
string
.
strip
(
url
)
url
=
string
.
strip
(
url
)
if
url
[:
1
]
==
'<'
and
url
[
-
1
:]
==
'>'
:
url
=
string
.
strip
(
url
[
1
:
-
1
])
if
url
[:
1
]
==
'<'
and
url
[
-
1
:]
==
'>'
:
url
=
string
.
strip
(
url
[
1
:
-
1
])
if
url
[:
4
]
==
'URL:'
:
url
=
string
.
strip
(
url
[
4
:])
if
url
[:
4
]
==
'URL:'
:
url
=
string
.
strip
(
url
[
4
:])
type
,
url
=
splittype
(
url
)
type
,
url
=
splittype
(
url
)
if
not
type
:
type
=
'file'
if
not
type
:
type
=
'file'
type
=
regsub
.
gsub
(
'-'
,
'_'
,
type
)
name
=
'open_'
+
type
try
:
if
'-'
in
name
:
func
=
eval
(
'open_'
+
type
)
import
regsub
except
NameError
:
name
=
regsub
.
gsub
(
'-'
,
'_'
,
name
)
if
not
hasattr
(
self
,
name
):
raise
IOError
,
(
'url error'
,
'unknown url type'
,
type
)
raise
IOError
,
(
'url error'
,
'unknown url type'
,
type
)
meth
=
getattr
(
self
,
name
)
try
:
try
:
return
func
(
url
)
return
meth
(
url
)
except
socket
.
error
,
msg
:
except
socket
.
error
,
msg
:
raise
IOError
,
(
'socket error'
,
msg
)
raise
IOError
,
(
'socket error'
,
msg
)
# Each method named open_<type> knows how to open that type of URL
# Each routine of the form open_<type> knows how to open that type of URL
# Use HTTP protocol
def
open_http
(
self
,
url
):
# Use HTTP protocol
def
open_http
(
url
):
import
httplib
import
httplib
host
,
selector
=
splithost
(
url
)
host
,
selector
=
splithost
(
url
)
h
=
httplib
.
HTTP
(
host
)
h
=
httplib
.
HTTP
(
host
)
h
.
putrequest
(
'GET'
,
selector
)
h
.
putrequest
(
'GET'
,
selector
)
for
args
in
self
.
addheaders
:
apply
(
h
.
putheader
,
args
)
errcode
,
errmsg
,
headers
=
h
.
getreply
()
errcode
,
errmsg
,
headers
=
h
.
getreply
()
if
errcode
==
200
:
return
makefile
(
h
.
getfile
(),
headers
)
if
errcode
==
200
:
return
addinfo
(
h
.
getfile
(),
headers
)
else
:
raise
IOError
,
(
'http error'
,
errcode
,
errmsg
,
headers
)
else
:
raise
IOError
,
(
'http error'
,
errcode
,
errmsg
,
headers
)
# Empty rfc822.Message object
# Use Gopher protocol
noheaders
=
rfc822
.
Message
(
open
(
'/dev/null'
,
'r'
))
def
open_gopher
(
self
,
url
):
noheaders
.
fp
.
close
()
# Recycle file descriptor
# Use Gopher protocol
def
open_gopher
(
url
):
import
gopherlib
import
gopherlib
host
,
selector
=
splithost
(
url
)
host
,
selector
=
splithost
(
url
)
type
,
selector
=
splitgophertype
(
selector
)
type
,
selector
=
splitgophertype
(
selector
)
selector
,
query
=
splitquery
(
selector
)
selector
,
query
=
splitquery
(
selector
)
if
query
:
fp
=
gopherlib
.
send_query
(
selector
,
query
,
host
)
if
query
:
fp
=
gopherlib
.
send_query
(
selector
,
query
,
host
)
else
:
fp
=
gopherlib
.
send_selector
(
selector
,
host
)
else
:
fp
=
gopherlib
.
send_selector
(
selector
,
host
)
return
makefile
(
fp
,
noheaders
)
return
addinfo
(
fp
,
noheaders
()
)
# Use local file or FTP depending on form of URL
# Use local file or FTP depending on form of URL
localhost
=
socket
.
gethostbyname
(
'localhost'
)
def
open_file
(
self
,
url
):
thishost
=
socket
.
gethostbyname
(
socket
.
gethostname
())
def
open_file
(
url
):
host
,
file
=
splithost
(
url
)
host
,
file
=
splithost
(
url
)
if
not
host
:
return
makefile
(
open
(
file
,
'r'
),
noheaders
)
if
not
host
:
return
addinfo
(
open
(
file
,
'r'
),
noheaders
()
)
host
,
port
=
splitport
(
host
)
host
,
port
=
splitport
(
host
)
if
not
port
and
socket
.
gethostbyname
(
host
)
in
(
localhost
,
thishost
):
if
not
port
and
socket
.
gethostbyname
(
host
)
in
(
localhost
(),
thishost
()):
try
:
fp
=
open
(
file
,
'r'
)
try
:
fp
=
open
(
file
,
'r'
)
except
IOError
:
fp
=
None
except
IOError
:
fp
=
None
if
fp
:
return
makefile
(
fp
,
noheaders
)
if
fp
:
return
addinfo
(
fp
,
noheaders
()
)
return
open_ftp
(
url
)
return
self
.
open_ftp
(
url
)
# Use FTP protocol
# Use FTP protocol
ftpcache
=
{}
def
open_ftp
(
self
,
url
):
ftperrors
=
(
ftplib
.
error_reply
,
ftplib
.
error_temp
,
ftplib
.
error_perm
,
ftplib
.
error_proto
)
def
open_ftp
(
url
):
host
,
file
=
splithost
(
url
)
host
,
file
=
splithost
(
url
)
host
,
port
=
splitport
(
host
)
host
,
port
=
splitport
(
host
)
host
=
socket
.
gethostbyname
(
host
)
host
=
socket
.
gethostbyname
(
host
)
if
not
port
:
port
=
ftplib
.
FTP_PORT
if
not
port
:
import
ftplib
port
=
ftplib
.
FTP_PORT
key
=
(
host
,
port
)
key
=
(
host
,
port
)
try
:
try
:
if
not
ftpcache
.
has_key
(
key
):
if
not
self
.
ftpcache
.
has_key
(
key
):
ftpcache
[
key
]
=
ftpwrapper
(
host
,
port
)
self
.
ftpcache
[
key
]
=
ftpwrapper
(
host
,
port
)
return
makefile
(
ftpcache
[
key
].
retrfile
(
file
),
noheaders
)
return
addinfo
(
self
.
ftpcache
[
key
].
retrfile
(
file
),
except
ftperrors
,
msg
:
noheaders
())
except
ftperrors
(),
msg
:
raise
IOError
,
(
'ftp error'
,
msg
)
raise
IOError
,
(
'ftp error'
,
msg
)
# Utility
classe
s
# Utility
function
s
# Class used to add an info() method to a file object
# Return the IP address of the magic hostname 'localhost'
class
makefile
:
_localhost
=
None
def
__init__
(
self
,
fp
,
headers
):
def
localhost
():
self
.
fp
=
fp
global
_localhost
self
.
headers
=
headers
if
not
_localhost
:
self
.
read
=
self
.
fp
.
read
_localhost
=
socket
.
gethostbyname
(
'localhost'
)
self
.
fileno
=
self
.
fp
.
fileno
return
_localhost
self
.
close
=
self
.
fp
.
close
def
info
(
self
):
# Return the IP address of the current host
return
self
.
headers
_thishost
=
None
def
thishost
():
global
_thishost
if
not
_thishost
:
_thishost
=
socket
.
gethostbyname
(
socket
.
gethostname
())
return
_thishost
# Return the set of errors raised by the FTP class
_ftperrors
=
None
def
ftperrors
():
global
_ftperrors
if
not
_ftperrors
:
import
ftplib
_ftperrors
=
(
ftplib
.
error_reply
,
ftplib
.
error_temp
,
ftplib
.
error_perm
,
ftplib
.
error_proto
)
return
_ftperrors
# Return an empty rfc822.Message object
_noheaders
=
None
def
noheaders
():
global
_noheaders
if
not
_noheaders
:
import
rfc822
_noheaders
=
rfc822
.
Message
(
open
(
'/dev/null'
,
'r'
))
_noheaders
.
fp
.
close
()
# Recycle file descriptor
return
_noheaders
# Utility classes
# Class used by open_ftp() for cache of open FTP connections
# Class used by open_ftp() for cache of open FTP connections
class
ftpwrapper
:
class
ftpwrapper
:
...
@@ -118,10 +178,12 @@ class ftpwrapper:
...
@@ -118,10 +178,12 @@ class ftpwrapper:
self
.
port
=
port
self
.
port
=
port
self
.
init
()
self
.
init
()
def
init
(
self
):
def
init
(
self
):
import
ftplib
self
.
ftp
=
ftplib
.
FTP
()
self
.
ftp
=
ftplib
.
FTP
()
self
.
ftp
.
connect
(
self
.
host
,
self
.
port
)
self
.
ftp
.
connect
(
self
.
host
,
self
.
port
)
self
.
ftp
.
login
()
self
.
ftp
.
login
()
def
retrfile
(
self
,
file
):
def
retrfile
(
self
,
file
):
import
ftplib
try
:
try
:
self
.
ftp
.
voidcmd
(
'TYPE I'
)
self
.
ftp
.
voidcmd
(
'TYPE I'
)
except
ftplib
.
all_errors
:
except
ftplib
.
all_errors
:
...
@@ -140,27 +202,43 @@ class ftpwrapper:
...
@@ -140,27 +202,43 @@ class ftpwrapper:
if
file
:
cmd
=
'NLST '
+
file
if
file
:
cmd
=
'NLST '
+
file
else
:
cmd
=
'NLST'
else
:
cmd
=
'NLST'
conn
=
self
.
ftp
.
transfercmd
(
cmd
)
conn
=
self
.
ftp
.
transfercmd
(
cmd
)
return
fakefile
(
self
.
ftp
,
conn
)
return
addclosehook
(
conn
.
makefile
(
'r'
),
self
.
ftp
.
voidresp
)
# Class used by ftpwrapper to handle response when transfer is complete
# Base class for addinfo and addclosehook
class
fakefile
:
class
addbase
:
def
__init__
(
self
,
ftp
,
conn
):
def
__init__
(
self
,
fp
):
self
.
ftp
=
ftp
self
.
fp
=
fp
self
.
conn
=
conn
self
.
fp
=
self
.
conn
.
makefile
(
'r'
)
self
.
read
=
self
.
fp
.
read
self
.
read
=
self
.
fp
.
read
self
.
readline
=
self
.
fp
.
readline
self
.
readlines
=
self
.
fp
.
readlines
self
.
fileno
=
self
.
fp
.
fileno
self
.
fileno
=
self
.
fp
.
fileno
def
__del__
(
self
):
def
__del__
(
self
):
self
.
close
()
self
.
close
()
def
close
(
self
):
def
close
(
self
):
self
.
conn
=
None
self
.
fp
=
None
self
.
fp
=
None
self
.
read
=
None
if
self
.
ftp
:
self
.
ftp
.
voidresp
()
# Class to add a close hook to an open file
self
.
ftp
=
None
class
addclosehook
(
addbase
):
def
__init__
(
self
,
fp
,
closehook
,
*
hookargs
):
addbase
.
__init__
(
self
,
fp
)
self
.
closehook
=
closehook
self
.
hookargs
=
hookargs
def
close
(
self
):
if
self
.
closehook
:
apply
(
self
.
closehook
,
self
.
hookargs
)
self
.
closehook
=
None
self
.
fp
=
None
# class to add an info() method to an open file
class
addinfo
(
addbase
):
def
__init__
(
self
,
fp
,
headers
):
addbase
.
__init__
(
self
,
fp
)
self
.
headers
=
headers
def
info
(
self
):
return
self
.
headers
# Utilities to
split url parts into component
s:
# Utilities to
parse URL
s:
# splittype('type:opaquestring') --> 'type', 'opaquestring'
# splittype('type:opaquestring') --> 'type', 'opaquestring'
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
# splitport('host:port') --> 'host', 'port'
# splitport('host:port') --> 'host', 'port'
...
@@ -168,29 +246,29 @@ class fakefile:
...
@@ -168,29 +246,29 @@ class fakefile:
# splittag('/path#tag') --> '/path', 'tag'
# splittag('/path#tag') --> '/path', 'tag'
# splitgophertype('/Xselector') --> 'X', 'selector'
# splitgophertype('/Xselector') --> 'X', 'selector'
typeprog
=
regex
.
compile
(
'^
\
([^/:]+
\
):
\
(.*
\
)$'
)
_
typeprog
=
regex
.
compile
(
'^
\
([^/:]+
\
):
\
(.*
\
)$'
)
def
splittype
(
url
):
def
splittype
(
url
):
if
typeprog
.
match
(
url
)
>=
0
:
return
typeprog
.
group
(
1
,
2
)
if
_typeprog
.
match
(
url
)
>=
0
:
return
_
typeprog
.
group
(
1
,
2
)
return
None
,
url
return
None
,
url
hostprog
=
regex
.
compile
(
'^//
\
([^/]+
\
)
\
(.*
\
)$'
)
_
hostprog
=
regex
.
compile
(
'^//
\
([^/]+
\
)
\
(.*
\
)$'
)
def
splithost
(
url
):
def
splithost
(
url
):
if
hostprog
.
match
(
url
)
>=
0
:
return
hostprog
.
group
(
1
,
2
)
if
_hostprog
.
match
(
url
)
>=
0
:
return
_
hostprog
.
group
(
1
,
2
)
return
None
,
url
return
None
,
url
portprog
=
regex
.
compile
(
'^
\
(.*
\
):
\
([
0
-9]+
\
)$
'
)
_
portprog
=
regex
.
compile
(
'^
\
(.*
\
):
\
([
0
-9]+
\
)$
'
)
def splitport(host):
def splitport(host):
if
portprog.match(host) >= 0: return
portprog.group(1, 2)
if
_portprog.match(host) >= 0: return _
portprog.group(1, 2)
return host, None
return host, None
queryprog = regex.compile('
^
\
(.
*
\
)
\
?
\
([
^
?
]
*
\
)
$
')
_
queryprog = regex.compile('
^
\
(.
*
\
)
\
?
\
([
^
?
]
*
\
)
$
')
def splitquery(url):
def splitquery(url):
if
queryprog.match(url) >= 0: return
queryprog.group(1, 2)
if
_queryprog.match(url) >= 0: return _
queryprog.group(1, 2)
return url, None
return url, None
tagprog = regex.compile('
^
\
(.
*
\
)
#\([^#]*\)$')
_
tagprog = regex.compile('
^
\
(.
*
\
)
#\([^#]*\)$')
def
splittag
(
url
):
def
splittag
(
url
):
if
tagprog
.
match
(
url
)
>=
0
:
return
tagprog
.
group
(
1
,
2
)
if
_tagprog
.
match
(
url
)
>=
0
:
return
_
tagprog
.
group
(
1
,
2
)
return
url
,
None
return
url
,
None
def
splitgophertype
(
selector
):
def
splitgophertype
(
selector
):
...
@@ -202,6 +280,7 @@ def splitgophertype(selector):
...
@@ -202,6 +280,7 @@ def splitgophertype(selector):
# Test program
# Test program
def
test
():
def
test
():
import
sys
import
sys
import
regsub
args
=
sys
.
argv
[
1
:]
args
=
sys
.
argv
[
1
:]
if
not
args
:
if
not
args
:
args
=
[
args
=
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment