Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
d376fceb
Commit
d376fceb
authored
Feb 21, 1994
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Towards a standard access mechanism for URLs.
parent
3d99ca69
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
546 additions
and
0 deletions
+546
-0
Lib/gopherlib.py
Lib/gopherlib.py
+195
-0
Lib/httplib.py
Lib/httplib.py
+129
-0
Lib/urlopen.py
Lib/urlopen.py
+222
-0
No files found.
Lib/gopherlib.py
0 → 100644
View file @
d376fceb
# Gopher protocol client interface
import
string
# Default selector, host and port
DEF_SELECTOR
=
'1/'
DEF_HOST
=
'gopher.micro.umn.edu'
DEF_PORT
=
70
# Recognized file types
A_TEXT
=
'0'
A_MENU
=
'1'
A_CSO
=
'2'
A_ERROR
=
'3'
A_MACBINHEX
=
'4'
A_PCBINHEX
=
'5'
A_UUENCODED
=
'6'
A_INDEX
=
'7'
A_TELNET
=
'8'
A_BINARY
=
'9'
A_DUPLICATE
=
'+'
A_SOUND
=
's'
A_EVENT
=
'e'
A_CALENDAR
=
'c'
A_HTML
=
'h'
A_TN3270
=
'T'
A_MIME
=
'M'
A_IMAGE
=
'I'
A_WHOIS
=
'w'
A_QUERY
=
'q'
A_GIF
=
'g'
A_HTML
=
'h'
# HTML file
A_WWW
=
'w'
# WWW address
A_PLUS_IMAGE
=
':'
A_PLUS_MOVIE
=
';'
A_PLUS_SOUND
=
'<'
# Function mapping all file types to strings; unknown types become TYPE='x'
_names
=
dir
()
_type_to_name_map
=
None
def
type_to_name
(
gtype
):
global
_type_to_name_map
if
not
_type_to_name_map
:
for
name
in
_names
:
if
name
[:
2
]
==
'A_'
:
_type_to_name_map
[
eval
(
name
)]
=
name
[
2
:]
if
_type_to_name_map
.
has_key
(
gtype
):
return
_type_to_name_map
[
gtype
]
return
'TYPE='
+
`gtype`
# Names for characters and strings
CRLF
=
'
\
r
\
n
'
TAB
=
'
\
t
'
# Send a selector to a given host and port, return a file with the reply
def
send_selector
(
selector
,
host
,
*
args
):
import
socket
import
string
if
args
:
if
args
[
1
:]:
raise
TypeError
,
'too many args'
port
=
args
[
0
]
else
:
port
=
None
i
=
string
.
find
(
host
,
':'
)
if
i
>=
0
:
host
,
port
=
host
[:
i
],
string
.
atoi
(
host
[
i
+
1
:])
if
not
port
:
port
=
DEF_PORT
elif
type
(
port
)
==
type
(
''
):
port
=
string
.
atoi
(
port
)
s
=
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
s
.
connect
(
host
,
port
)
s
.
send
(
selector
+
CRLF
)
s
.
shutdown
(
1
)
return
s
.
makefile
(
'r'
)
# Send a selector and a query string
def
send_query
(
selector
,
query
,
host
,
*
args
):
return
apply
(
send_selector
,
(
selector
+
'
\
t
'
+
query
,
host
)
+
args
)
# The following functions interpret the data returned by the gopher
# server according to the expected type, e.g. textfile or directory
# Get a directory in the form of a list of entries
def
get_directory
(
f
):
import
string
list
=
[]
while
1
:
line
=
f
.
readline
()
if
not
line
:
print
'(Unexpected EOF from server)'
break
if
line
[
-
2
:]
==
CRLF
:
line
=
line
[:
-
2
]
elif
line
[
-
1
:]
in
CRLF
:
line
=
line
[:
-
1
]
if
line
==
'.'
:
break
if
not
line
:
print
'(Empty line from server)'
continue
gtype
=
line
[
0
]
parts
=
string
.
splitfields
(
line
[
1
:],
TAB
)
if
len
(
parts
)
<
4
:
print
'(Bad line from server:'
,
`line`
,
')'
continue
if
len
(
parts
)
>
4
:
if
parts
[
4
:]
!=
[
'+'
]:
print
'(Extra info from server:'
,
parts
[
4
:],
')'
else
:
parts
.
append
(
''
)
parts
.
insert
(
0
,
gtype
)
list
.
append
(
parts
)
return
list
# Get a text file as a list of lines, with trailing CRLF stripped
def
get_textfile
(
f
):
list
=
[]
get_alt_textfile
(
f
,
list
.
append
)
return
list
# Get a text file and pass each line to a function, with trailing CRLF stripped
def
get_alt_textfile
(
f
,
func
):
while
1
:
line
=
f
.
readline
()
if
not
line
:
print
'(Unexpected EOF from server)'
break
if
line
[
-
2
:]
==
CRLF
:
line
=
line
[:
-
2
]
elif
line
[
-
1
:]
in
CRLF
:
line
=
line
[:
-
1
]
if
line
==
'.'
:
break
if
line
[:
2
]
==
'..'
:
line
=
line
[
1
:]
func
(
line
)
# Get a binary file as one solid data block
def
get_binary
(
f
):
data
=
f
.
read
()
return
data
# Get a binary file and pass each block to a function
def
get_alt_binary
(
f
,
func
,
blocksize
):
while
1
:
data
=
f
.
read
(
blocksize
)
if
not
data
:
break
func
(
data
)
# Trivial test program
def
test
():
import
sys
import
getopt
opts
,
args
=
getopt
.
getopt
(
sys
.
argv
[
1
:],
''
)
selector
=
DEF_SELECTOR
type
=
selector
[
0
]
host
=
DEF_HOST
port
=
DEF_PORT
if
args
:
host
=
args
[
0
]
args
=
args
[
1
:]
if
args
:
type
=
args
[
0
]
args
=
args
[
1
:]
if
len
(
type
)
>
1
:
type
,
selector
=
type
[
0
],
type
else
:
selector
=
''
if
args
:
selector
=
args
[
0
]
args
=
args
[
1
:]
query
=
''
if
args
:
query
=
args
[
0
]
args
=
args
[
1
:]
if
type
==
A_INDEX
:
f
=
send_query
(
selector
,
query
,
host
)
else
:
f
=
send_selector
(
selector
,
host
)
if
type
==
A_TEXT
:
list
=
get_textfile
(
f
)
for
item
in
list
:
print
item
elif
type
in
(
A_MENU
,
A_INDEX
):
list
=
get_directory
(
f
)
for
item
in
list
:
print
item
else
:
data
=
get_binary
(
f
)
print
'binary data:'
,
len
(
data
),
'bytes:'
,
`data[:100]`
[:
40
]
# Run the test when run as script
if
__name__
==
'__main__'
:
test
()
Lib/httplib.py
0 → 100644
View file @
d376fceb
# HTTP client class
#
# See the following document for a tentative protocol description:
# Hypertext Transfer Protocol (HTTP) Tim Berners-Lee, CERN
# Internet Draft 5 Nov 1993
# draft-ietf-iiir-http-00.txt Expires 5 May 1994
#
# Example:
#
# >>> from httplib import HTTP
# >>> h = HTTP('www.cwi.nl')
# >>> h.putreqest('GET', '/index.html')
# >>> h.putheader('Accept', 'text/html')
# >>> h.putheader('Accept', 'text/plain')
# >>> errcode, errmsg, headers = h.getreply()
# >>> if errcode == 200:
# ... f = h.getfile()
# ... print f.read() # Print the raw HTML
# ...
# <TITLE>Home Page of CWI, Amsterdam</TITLE>
# [...many more lines...]
# >>>
#
# Note that an HTTP object is used for a single request -- to issue a
# second request to the same server, you create a new HTTP object.
# (This is in accordance with the protocol, which uses a new TCP
# connection for each request.)
import
os
import
socket
import
string
import
regex
import
regsub
import
rfc822
HTTP_VERSION
=
'HTTP/1.0'
HTTP_PORT
=
80
replypat
=
regsub
.
gsub
(
'
\
\
.'
,
'
\
\
\
\
.'
,
HTTP_VERSION
)
+
\
'[
\
t
]+
\
([
0
-9][0-9][0-9]
\
)
\
(.*
\
)
'
replyprog = regex.compile(replypat)
class HTTP:
def __init__(self, *args):
self.debuglevel = 0
if args: apply(self.connect, args)
def set_debuglevel(self, debuglevel):
self.debuglevel = debuglevel
def connect(self, host, *args):
if args:
if args[1:]: raise TypeError, '
too
many
args
'
port = args[0]
else:
i = string.find(host, '
:
')
if i >= 0:
host, port = host[:i], host[i+1:]
try: port = string.atoi(port)
except string.atoi_error: port = None
if not port: port = HTTP_PORT
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
if self.debuglevel > 0: print '
connect
:
', (host, port)
self.sock.connect(host, port)
def send(self, str):
if self.debuglevel > 0: print '
send
:
', `str`
self.sock.send(str)
def putrequest(self, request, selector):
str = '
%
s
%
s
%
s
\
r
\
n
' % (request, selector, HTTP_VERSION)
self.send(str)
def putheader(self, header, *args):
str = '
%
s
:
%
s
\
r
\
n
' % (header, string.joinfields(args,'
\
r
\
n
\
t
'))
self.send(str)
def endheaders(self):
self.send('
\
r
\
n
')
def endrequest(self):
if self.debuglevel > 0: print '
shutdown
:
1
'
self.sock.shutdown(1)
def getreply(self):
self.endrequest()
self.file = self.sock.makefile('r')
line = self.file.readline()
if self.debuglevel > 0: print '
reply
:
', `line`
if replyprog.match(line) < 0:
self.headers = None
return -1, line, self.headers
errcode, errmsg = replyprog.group(1, 2)
errcode = string.atoi(errcode)
errmsg = string.strip(errmsg)
self.headers = rfc822.Message(self.file)
return errcode, errmsg, self.headers
def getfile(self):
return self.file
def test():
import sys
import getopt
opts, args = getopt.getopt(sys.argv[1:], '
d
')
dl = 0
for o, a in opts:
if o == '
-
d
': dl = dl + 1
host = '
www
.
cwi
.
nl
:
80
'
selector = '
/
index
.
html
'
if args[0:]: host = args[0]
if args[1:]: selector = args[1]
h = HTTP()
h.set_debuglevel(dl)
h.connect(host)
h.putrequest('
GET
', selector)
errcode, errmsg, headers = h.getreply()
print '
errcode
=
', errcode
print '
headers
=
', headers
print '
errmsg
=
', errmsg
if headers:
for header in headers.headers: print string.strip(header)
print h.getfile().read()
if __name__ == '
__main__
':
test()
Lib/urlopen.py
0 → 100755
View file @
d376fceb
# Open an arbitrary URL
#
# See the following document for a tentative description of URLs:
# Uniform Resource Locators Tim Berners-Lee
# INTERNET DRAFT CERN
# IETF URL Working Group 14 July 1993
# draft-ietf-uri-url-01.txt
#
# The object returned by urlopen() will differ per protocol.
# All you know is that is has methods read(), fileno(), close() and info().
# The read(), fileno() and close() methods work like those of open files.
# The info() method returns an rfc822.Message object which can be
# used to query various info about the object, if available.
# (rfc822.Message objects are queried with the getheader() method.)
import
socket
import
regex
import
regsub
import
string
import
rfc822
import
ftplib
# External interface -- use urlopen(file) as if it were open(file, 'r')
def
urlopen
(
url
):
url
=
string
.
strip
(
url
)
if
url
[:
1
]
==
'<'
and
url
[
-
1
:]
==
'>'
:
url
=
string
.
strip
(
url
[
1
:
-
1
])
if
url
[:
4
]
==
'URL:'
:
url
=
string
.
strip
(
url
[
4
:])
type
,
url
=
splittype
(
url
)
if
not
type
:
type
=
'file'
type
=
regsub
.
gsub
(
'-'
,
'_'
,
type
)
try
:
func
=
eval
(
'open_'
+
type
)
except
NameError
:
raise
IOError
,
(
'url error'
,
'unknown url type'
,
type
)
try
:
return
func
(
url
)
except
socket
.
error
,
msg
:
raise
IOError
,
(
'socket error'
,
msg
)
# Each routine of the form open_<type> knows how to open that type of URL
# Use HTTP protocol
def
open_http
(
url
):
import
httplib
host
,
selector
=
splithost
(
url
)
h
=
httplib
.
HTTP
(
host
)
h
.
putrequest
(
'GET'
,
selector
)
errcode
,
errmsg
,
headers
=
h
.
getreply
()
if
errcode
==
200
:
return
makefile
(
h
.
getfile
(),
headers
)
else
:
raise
IOError
,
(
'http error'
,
errcode
,
errmsg
,
headers
)
# Empty rfc822.Message object
noheaders
=
rfc822
.
Message
(
open
(
'/dev/null'
,
'r'
))
noheaders
.
fp
.
close
()
# Recycle file descriptor
# Use Gopher protocol
def
open_gopher
(
url
):
import
gopherlib
host
,
selector
=
splithost
(
url
)
type
,
selector
=
splitgophertype
(
selector
)
selector
,
query
=
splitquery
(
selector
)
if
query
:
fp
=
gopherlib
.
send_query
(
selector
,
query
,
host
)
else
:
fp
=
gopherlib
.
send_selector
(
selector
,
host
)
return
makefile
(
fp
,
noheaders
)
# Use local file or FTP depending on form of URL
localhost
=
socket
.
gethostbyname
(
'localhost'
)
thishost
=
socket
.
gethostbyname
(
socket
.
gethostname
())
def
open_file
(
url
):
host
,
file
=
splithost
(
url
)
if
not
host
:
return
makefile
(
open
(
file
,
'r'
),
noheaders
)
host
,
port
=
splitport
(
host
)
if
not
port
and
socket
.
gethostbyname
(
host
)
in
(
localhost
,
thishost
):
try
:
fp
=
open
(
file
,
'r'
)
except
IOError
:
fp
=
None
if
fp
:
return
makefile
(
fp
,
noheaders
)
return
open_ftp
(
url
)
# Use FTP protocol
ftpcache
=
{}
ftperrors
=
(
ftplib
.
error_reply
,
ftplib
.
error_temp
,
ftplib
.
error_perm
,
ftplib
.
error_proto
)
def
open_ftp
(
url
):
host
,
file
=
splithost
(
url
)
host
,
port
=
splitport
(
host
)
host
=
socket
.
gethostbyname
(
host
)
if
not
port
:
port
=
ftplib
.
FTP_PORT
key
=
(
host
,
port
)
try
:
if
not
ftpcache
.
has_key
(
key
):
ftpcache
[
key
]
=
ftpwrapper
(
host
,
port
)
return
makefile
(
ftpcache
[
key
].
retrfile
(
file
),
noheaders
)
except
ftperrors
,
msg
:
raise
IOError
,
(
'ftp error'
,
msg
)
# Utility classes
# Class used to add an info() method to a file object
class
makefile
:
def
__init__
(
self
,
fp
,
headers
):
self
.
fp
=
fp
self
.
headers
=
headers
self
.
read
=
self
.
fp
.
read
self
.
fileno
=
self
.
fp
.
fileno
self
.
close
=
self
.
fp
.
close
def
info
(
self
):
return
self
.
headers
# Class used by open_ftp() for cache of open FTP connections
class
ftpwrapper
:
def
__init__
(
self
,
host
,
port
):
self
.
host
=
host
self
.
port
=
port
self
.
init
()
def
init
(
self
):
self
.
ftp
=
ftplib
.
FTP
()
self
.
ftp
.
connect
(
self
.
host
,
self
.
port
)
self
.
ftp
.
login
()
def
retrfile
(
self
,
file
):
try
:
self
.
ftp
.
voidcmd
(
'TYPE I'
)
except
ftplib
.
all_errors
:
self
.
init
()
self
.
ftp
.
voidcmd
(
'TYPE I'
)
conn
=
None
if
file
:
try
:
cmd
=
'RETR '
+
file
conn
=
self
.
ftp
.
transfercmd
(
cmd
)
except
ftplib
.
error_perm
,
reason
:
if
reason
[:
3
]
!=
'550'
:
raise
IOError
,
(
'ftp error'
,
reason
)
if
not
conn
:
# Try a directory listing
if
file
:
cmd
=
'NLST '
+
file
else
:
cmd
=
'NLST'
conn
=
self
.
ftp
.
transfercmd
(
cmd
)
return
fakefile
(
self
.
ftp
,
conn
)
# Class used by ftpwrapper to handle response when transfer is complete
class
fakefile
:
def
__init__
(
self
,
ftp
,
conn
):
self
.
ftp
=
ftp
self
.
conn
=
conn
self
.
fp
=
self
.
conn
.
makefile
(
'r'
)
self
.
read
=
self
.
fp
.
read
self
.
fileno
=
self
.
fp
.
fileno
def
__del__
(
self
):
self
.
close
()
def
close
(
self
):
self
.
conn
=
None
self
.
fp
=
None
self
.
read
=
None
if
self
.
ftp
:
self
.
ftp
.
voidresp
()
self
.
ftp
=
None
# Utilities to split url parts into components:
# splittype('type:opaquestring') --> 'type', 'opaquestring'
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
# splitport('host:port') --> 'host', 'port'
# splitquery('/path?query') --> '/path', 'query'
# splittag('/path#tag') --> '/path', 'tag'
# splitgophertype('/Xselector') --> 'X', 'selector'
typeprog
=
regex
.
compile
(
'^
\
([^/:]+
\
):
\
(.*
\
)$'
)
def
splittype
(
url
):
if
typeprog
.
match
(
url
)
>=
0
:
return
typeprog
.
group
(
1
,
2
)
return
None
,
url
hostprog
=
regex
.
compile
(
'^//
\
([^/]+
\
)
\
(.*
\
)$'
)
def
splithost
(
url
):
if
hostprog
.
match
(
url
)
>=
0
:
return
hostprog
.
group
(
1
,
2
)
return
None
,
url
portprog
=
regex
.
compile
(
'^
\
(.*
\
):
\
([
0
-9]+
\
)$
'
)
def splitport(host):
if portprog.match(host) >= 0: return portprog.group(1, 2)
return host, None
queryprog = regex.compile('
^
\
(.
*
\
)
\
?
\
([
^
?
]
*
\
)
$
')
def splitquery(url):
if queryprog.match(url) >= 0: return queryprog.group(1, 2)
return url, None
tagprog = regex.compile('
^
\
(.
*
\
)
#\([^#]*\)$')
def
splittag
(
url
):
if
tagprog
.
match
(
url
)
>=
0
:
return
tagprog
.
group
(
1
,
2
)
return
url
,
None
def
splitgophertype
(
selector
):
if
selector
[:
1
]
==
'/'
and
selector
[
1
:
2
]:
return
selector
[
1
],
selector
[
2
:]
return
None
,
selector
# Test program
def
test
():
import
sys
args
=
sys
.
argv
[
1
:]
if
not
args
:
args
=
[
'/etc/passwd'
,
'file:/etc/passwd'
,
'file://localhost/etc/passwd'
,
'ftp://ftp.cwi.nl/etc/passwd'
,
'gopher://gopher.cwi.nl/11/'
,
'http://www.cwi.nl/index.html'
,
]
for
arg
in
args
:
print
'-'
*
10
,
arg
,
'-'
*
10
print
regsub
.
gsub
(
'
\
r
'
,
''
,
urlopen
(
arg
).
read
())
print
'-'
*
40
# Run test program when run as a script
if
__name__
==
'__main__'
:
test
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment