Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
f39882d8
Commit
f39882d8
authored
May 28, 1996
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimizations due to Fred Drake; added urldefrag() function
parent
fe11675d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
35 additions
and
18 deletions
+35
-18
Lib/urlparse.py
Lib/urlparse.py
+35
-18
No files found.
Lib/urlparse.py
View file @
f39882d8
...
...
@@ -3,6 +3,7 @@
# Standard/builtin Python modules
import
string
from
string
import
joinfields
,
splitfields
,
find
,
rfind
# A classification of schemes ('' means apply by default)
uses_relative
=
[
'ftp'
,
'http'
,
'gopher'
,
'nntp'
,
'wais'
,
'file'
,
...
...
@@ -18,17 +19,23 @@ uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais',
# Characters valid in scheme names
scheme_chars
=
string
.
letters
+
string
.
digits
+
'+-.'
_parse_cache
=
{}
def
clear_cache
():
global
_parse_cache
_parse_cache
=
{}
# Parse a URL into 6 components:
# <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
# Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
# Note that we don't break the components up in smaller bits
# (e.g. netloc is a single string) and we don't expand % escapes.
def
urlparse
(
url
,
scheme
=
''
,
allow_framents
=
1
):
netloc
=
''
path
=
''
params
=
''
query
=
''
fragment
=
''
key
=
url
,
scheme
,
allow_framents
if
_parse_cache
.
has_key
(
key
):
return
_parse_cache
[
key
]
netloc
=
path
=
params
=
query
=
fragment
=
''
i
=
string
.
find
(
url
,
':'
)
if
i
>
0
:
for
c
in
url
[:
i
]:
...
...
@@ -54,7 +61,9 @@ def urlparse(url, scheme = '', allow_framents = 1):
i
=
string
.
find
(
url
,
';'
)
if
i
>=
0
:
url
,
params
=
url
[:
i
],
url
[
i
+
1
:]
return
scheme
,
netloc
,
url
,
params
,
query
,
fragment
tuple
=
scheme
,
netloc
,
url
,
params
,
query
,
fragment
_parse_cache
[
key
]
=
tuple
return
tuple
# Put a parsed URL back together again. This may result in a slightly
# different, but equivalent URL, if the URL that was parsed originally
...
...
@@ -80,7 +89,7 @@ def urljoin(base, url, allow_framents = 1):
if
not
base
:
return
url
bscheme
,
bnetloc
,
bpath
,
bparams
,
bquery
,
bfragment
=
\
urlparse
(
base
,
''
,
allow_framents
)
urlparse
(
base
,
''
,
allow_framents
)
scheme
,
netloc
,
path
,
params
,
query
,
fragment
=
\
urlparse
(
url
,
bscheme
,
allow_framents
)
# XXX Unofficial hack: default netloc to bnetloc even if
...
...
@@ -90,9 +99,9 @@ def urljoin(base, url, allow_framents = 1):
scheme
in
uses_netloc
and
bscheme
in
uses_netloc
:
netloc
=
bnetloc
# Strip the port number
i
=
string
.
find
(
netloc
,
'@'
)
i
=
find
(
netloc
,
'@'
)
if
i
<
0
:
i
=
0
i
=
string
.
find
(
netloc
,
':'
,
i
)
i
=
find
(
netloc
,
':'
,
i
)
if
i
>=
0
:
netloc
=
netloc
[:
i
]
if
scheme
!=
bscheme
or
scheme
not
in
uses_relative
:
...
...
@@ -107,15 +116,12 @@ def urljoin(base, url, allow_framents = 1):
return
urlunparse
((
scheme
,
netloc
,
path
,
params
,
query
,
fragment
))
if
not
path
:
path
=
bpath
if
not
query
:
query
=
bquery
return
urlunparse
((
scheme
,
netloc
,
path
,
params
,
query
,
fragment
))
i
=
string
.
rfind
(
bpath
,
'/'
)
return
urlunparse
((
scheme
,
netloc
,
bpath
,
params
,
query
or
bquery
,
fragment
))
i
=
rfind
(
bpath
,
'/'
)
if
i
>=
0
:
path
=
bpath
[:
i
]
+
'/'
+
path
segments
=
s
tring
.
s
plitfields
(
path
,
'/'
)
segments
=
splitfields
(
path
,
'/'
)
if
segments
[
-
1
]
==
'.'
:
segments
[
-
1
]
=
''
while
'.'
in
segments
:
...
...
@@ -132,10 +138,21 @@ def urljoin(base, url, allow_framents = 1):
break
if
len
(
segments
)
>=
2
and
segments
[
-
1
]
==
'..'
:
segments
[
-
2
:]
=
[
''
]
path
=
string
.
joinfields
(
segments
,
'/'
)
return
urlunparse
((
scheme
,
netloc
,
path
,
return
urlunparse
((
scheme
,
netloc
,
joinfields
(
segments
,
'/'
),
params
,
query
,
fragment
))
def
urldefrag
(
url
):
"""Removes any existing fragment from URL.
Returns a tuple of the defragmented URL and the fragment. If
the URL contained no fragments, the second element is the
empty string.
"""
s
,
n
,
p
,
a
,
q
,
frag
=
urlparse
(
url
)
defrag
=
urlunparse
((
s
,
n
,
p
,
a
,
q
,
''
))
return
defrag
,
frag
test_input
=
"""
http://a/b/c/d
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment