Commit fad81f08 authored by Guido van Rossum's avatar Guido van Rossum

Be explicit about scheme_chars -- string.letters is locale dependent

so we can't use it.

While I'm at it, got rid of string module use.  (Found several new
hard special cases for a hypothetical conversion tool: from string
import join, find, rfind; and a local assignment "find=string.find".)
parent 68abe832
......@@ -4,10 +4,6 @@ See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
UC Irvine, June 1995.
"""
# Standard/builtin Python modules
import string
from string import join, split, rfind
# A classification of schemes ('' means apply by default)
uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file',
'https', 'shttp',
......@@ -31,7 +27,10 @@ uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais',
'file', 'prospero', '']
# Characters valid in scheme names
scheme_chars = string.letters + string.digits + '+-.'
scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'0123456789'
'+-.')
MAX_CACHE_SIZE = 20
_parse_cache = {}
......@@ -54,29 +53,28 @@ def urlparse(url, scheme = '', allow_fragments = 1):
return cached
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
clear_cache()
find = string.find
netloc = path = params = query = fragment = ''
i = find(url, ':')
i = url.find(':')
if i > 0:
if url[:i] == 'http': # optimize the common case
scheme = string.lower(url[:i])
scheme = url[:i].lower()
url = url[i+1:]
if url[:2] == '//':
i = find(url, '/', 2)
i = url.find('/', 2)
if i < 0:
i = len(url)
netloc = url[2:i]
url = url[i:]
if allow_fragments:
i = string.rfind(url, '#')
i = url.rfind('#')
if i >= 0:
fragment = url[i+1:]
url = url[:i]
i = find(url, '?')
i = url.find('?')
if i >= 0:
query = url[i+1:]
url = url[:i]
i = find(url, ';')
i = url.find(';')
if i >= 0:
params = url[i+1:]
url = url[:i]
......@@ -87,23 +85,23 @@ def urlparse(url, scheme = '', allow_fragments = 1):
if c not in scheme_chars:
break
else:
scheme, url = string.lower(url[:i]), url[i+1:]
scheme, url = url[:i].lower(), url[i+1:]
if scheme in uses_netloc:
if url[:2] == '//':
i = find(url, '/', 2)
i = url.find('/', 2)
if i < 0:
i = len(url)
netloc, url = url[2:i], url[i:]
if allow_fragments and scheme in uses_fragment:
i = string.rfind(url, '#')
i = url.rfind('#')
if i >= 0:
url, fragment = url[:i], url[i+1:]
if scheme in uses_query:
i = find(url, '?')
i = url.find('?')
if i >= 0:
url, query = url[:i], url[i+1:]
if scheme in uses_params:
i = find(url, ';')
i = url.find(';')
if i >= 0:
url, params = url[:i], url[i+1:]
tuple = scheme, netloc, url, params, query, fragment
......@@ -151,7 +149,7 @@ def urljoin(base, url, allow_fragments = 1):
if not path:
return urlunparse((scheme, netloc, bpath,
params, query or bquery, fragment))
segments = split(bpath, '/')[:-1] + split(path, '/')
segments = bpath.split('/')[:-1] + path.split('/')
# XXX The stuff below is bogus in various ways...
if segments[-1] == '.':
segments[-1] = ''
......@@ -171,7 +169,7 @@ def urljoin(base, url, allow_fragments = 1):
segments[-1] = ''
elif len(segments) >= 2 and segments[-1] == '..':
segments[-2:] = ['']
return urlunparse((scheme, netloc, join(segments, '/'),
return urlunparse((scheme, netloc, '/'.join(segments),
params, query, fragment))
def urldefrag(url):
......@@ -236,7 +234,7 @@ def test():
while 1:
line = fp.readline()
if not line: break
words = string.split(line)
words = line.split()
if not words:
continue
url = words[0]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment