Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
984f630f
Commit
984f630f
authored
Jan 05, 2014
by
R David Murray
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
#1065986: Make pydoc handle unicode strings.
Patch by Akira Kitada.
parent
1d2ef64d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
137 additions
and
14 deletions
+137
-14
Lib/pydoc.py
Lib/pydoc.py
+44
-14
Lib/test/test_pydoc.py
Lib/test/test_pydoc.py
+91
-0
Misc/NEWS
Misc/NEWS
+2
-0
No files found.
Lib/pydoc.py
View file @
984f630f
...
...
@@ -81,6 +81,7 @@ def pathdirs():
def
getdoc
(
object
):
"""Get the doc string or comments for an object."""
result
=
inspect
.
getdoc
(
object
)
or
inspect
.
getcomments
(
object
)
result
=
_encode
(
result
)
return
result
and
re
.
sub
(
'^ *
\
n
'
,
''
,
rstrip
(
result
))
or
''
def
splitdoc
(
doc
):
...
...
@@ -182,6 +183,34 @@ def classify_class_attrs(object):
return
name
,
kind
,
cls
,
value
return
map
(
fixup
,
inspect
.
classify_class_attrs
(
object
))
# ----------------------------------------------------- Unicode support helpers
try
:
_unicode
=
unicode
except
NameError
:
# If Python is built without Unicode support, the unicode type
# will not exist. Fake one that nothing will match, and make
# the _encode function that do nothing.
class
_unicode
(
object
):
pass
_encoding
=
'ascii'
def
_encode
(
text
,
encoding
=
'ascii'
):
return
text
else
:
import
locale
_encoding
=
locale
.
getpreferredencoding
()
def
_encode
(
text
,
encoding
=
None
):
if
isinstance
(
text
,
unicode
):
return
text
.
encode
(
encoding
or
_encoding
,
'xmlcharrefreplace'
)
else
:
return
text
def
_binstr
(
obj
):
# Ensure that we have an encoded (binary) string representation of obj,
# even if it is a unicode string.
return
obj
.
encode
(
_encoding
)
if
isinstance
(
obj
,
_unicode
)
else
str
(
obj
)
# ----------------------------------------------------- module manipulation
def
ispackage
(
path
):
...
...
@@ -424,12 +453,13 @@ class HTMLDoc(Doc):
def
page
(
self
,
title
,
contents
):
"""Format an HTML page."""
return
'''
return
_encode
(
'''
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html><head><title>Python: %s</title>
<meta charset="utf-8">
</head><body bgcolor="#f0f0f8">
%s
</body></html>'''
%
(
title
,
contents
)
</body></html>'''
%
(
title
,
contents
)
,
'ascii'
)
def
heading
(
self
,
title
,
fgcol
,
bgcol
,
extras
=
''
):
"""Format a page heading."""
...
...
@@ -606,12 +636,12 @@ class HTMLDoc(Doc):
filelink
=
'(built-in)'
info
=
[]
if
hasattr
(
object
,
'__version__'
):
version
=
str
(
object
.
__version__
)
version
=
_bin
str
(
object
.
__version__
)
if
version
[:
11
]
==
'$'
+
'Revision: '
and
version
[
-
1
:]
==
'$'
:
version
=
strip
(
version
[
11
:
-
1
])
info
.
append
(
'version %s'
%
self
.
escape
(
version
))
if
hasattr
(
object
,
'__date__'
):
info
.
append
(
self
.
escape
(
str
(
object
.
__date__
)))
info
.
append
(
self
.
escape
(
_bin
str
(
object
.
__date__
)))
if
info
:
head
=
head
+
' (%s)'
%
join
(
info
,
', '
)
docloc
=
self
.
getdocloc
(
object
)
...
...
@@ -694,11 +724,11 @@ class HTMLDoc(Doc):
result
=
result
+
self
.
bigsection
(
'Data'
,
'#ffffff'
,
'#55aa55'
,
join
(
contents
,
'<br>
\
n
'
))
if
hasattr
(
object
,
'__author__'
):
contents
=
self
.
markup
(
str
(
object
.
__author__
),
self
.
preformat
)
contents
=
self
.
markup
(
_bin
str
(
object
.
__author__
),
self
.
preformat
)
result
=
result
+
self
.
bigsection
(
'Author'
,
'#ffffff'
,
'#7799ee'
,
contents
)
if
hasattr
(
object
,
'__credits__'
):
contents
=
self
.
markup
(
str
(
object
.
__credits__
),
self
.
preformat
)
contents
=
self
.
markup
(
_bin
str
(
object
.
__credits__
),
self
.
preformat
)
result
=
result
+
self
.
bigsection
(
'Credits'
,
'#ffffff'
,
'#7799ee'
,
contents
)
...
...
@@ -1116,16 +1146,16 @@ class TextDoc(Doc):
result
=
result
+
self
.
section
(
'DATA'
,
join
(
contents
,
'
\
n
'
))
if
hasattr
(
object
,
'__version__'
):
version
=
str
(
object
.
__version__
)
version
=
_bin
str
(
object
.
__version__
)
if
version
[:
11
]
==
'$'
+
'Revision: '
and
version
[
-
1
:]
==
'$'
:
version
=
strip
(
version
[
11
:
-
1
])
result
=
result
+
self
.
section
(
'VERSION'
,
version
)
if
hasattr
(
object
,
'__date__'
):
result
=
result
+
self
.
section
(
'DATE'
,
str
(
object
.
__date__
))
result
=
result
+
self
.
section
(
'DATE'
,
_bin
str
(
object
.
__date__
))
if
hasattr
(
object
,
'__author__'
):
result
=
result
+
self
.
section
(
'AUTHOR'
,
str
(
object
.
__author__
))
result
=
result
+
self
.
section
(
'AUTHOR'
,
_bin
str
(
object
.
__author__
))
if
hasattr
(
object
,
'__credits__'
):
result
=
result
+
self
.
section
(
'CREDITS'
,
str
(
object
.
__credits__
))
result
=
result
+
self
.
section
(
'CREDITS'
,
_bin
str
(
object
.
__credits__
))
return
result
def
docclass
(
self
,
object
,
name
=
None
,
mod
=
None
,
*
ignored
):
...
...
@@ -1375,7 +1405,7 @@ def pipepager(text, cmd):
"""Page through text by feeding it to another program."""
pipe
=
os
.
popen
(
cmd
,
'w'
)
try
:
pipe
.
write
(
text
)
pipe
.
write
(
_encode
(
text
)
)
pipe
.
close
()
except
IOError
:
pass
# Ignore broken pipes caused by quitting the pager program.
...
...
@@ -1385,7 +1415,7 @@ def tempfilepager(text, cmd):
import
tempfile
filename
=
tempfile
.
mktemp
()
file
=
open
(
filename
,
'w'
)
file
.
write
(
text
)
file
.
write
(
_encode
(
text
)
)
file
.
close
()
try
:
os
.
system
(
cmd
+
' "'
+
filename
+
'"'
)
...
...
@@ -1394,7 +1424,7 @@ def tempfilepager(text, cmd):
def
ttypager
(
text
):
"""Page through text on a text terminal."""
lines
=
split
(
plain
(
text
),
'
\
n
'
)
lines
=
plain
(
_encode
(
plain
(
text
),
getattr
(
sys
.
stdout
,
'encoding'
,
_encoding
))).
split
(
'
\
n
'
)
try
:
import
tty
fd
=
sys
.
stdin
.
fileno
()
...
...
@@ -1432,7 +1462,7 @@ def ttypager(text):
def
plainpager
(
text
):
"""Simply print unformatted text. This is the ultimate fallback."""
sys
.
stdout
.
write
(
plain
(
text
))
sys
.
stdout
.
write
(
_encode
(
plain
(
text
),
getattr
(
sys
.
stdout
,
'encoding'
,
_encoding
)
))
def
describe
(
thing
):
"""Produce a short description of the given thing."""
...
...
Lib/test/test_pydoc.py
View file @
984f630f
...
...
@@ -10,6 +10,7 @@ import keyword
import
pkgutil
import
unittest
import
xml.etree
import
types
import
test.test_support
from
collections
import
namedtuple
from
test.script_helper
import
assert_python_ok
...
...
@@ -428,6 +429,95 @@ class TestDescriptions(unittest.TestCase):
self
.
assertIn
(
'_asdict'
,
helptext
)
@
unittest
.
skipUnless
(
test
.
test_support
.
have_unicode
,
"test requires unicode support"
)
class
TestUnicode
(
unittest
.
TestCase
):
def
setUp
(
self
):
# Better not to use unicode escapes in literals, lest the
# parser choke on it if Python has been built without
# unicode support.
self
.
Q
=
types
.
ModuleType
(
'Q'
,
'Rational numbers:
\
xe2
\
x84
\
x9a
'
.
decode
(
'utf8'
))
self
.
Q
.
__version__
=
'
\
xe2
\
x84
\
x9a
'
.
decode
(
'utf8'
)
self
.
Q
.
__date__
=
'
\
xe2
\
x84
\
x9a
'
.
decode
(
'utf8'
)
self
.
Q
.
__author__
=
'
\
xe2
\
x84
\
x9a
'
.
decode
(
'utf8'
)
self
.
Q
.
__credits__
=
'
\
xe2
\
x84
\
x9a
'
.
decode
(
'utf8'
)
self
.
assertIsInstance
(
self
.
Q
.
__doc__
,
unicode
)
def
test_render_doc
(
self
):
# render_doc is robust against unicode in docstrings
doc
=
pydoc
.
render_doc
(
self
.
Q
)
self
.
assertIsInstance
(
doc
,
str
)
def
test_encode
(
self
):
# _encode is robust against characters out the specified encoding
self
.
assertEqual
(
pydoc
.
_encode
(
self
.
Q
.
__doc__
,
'ascii'
),
'Rational numbers: ℚ'
)
def
test_pipepager
(
self
):
# pipepager does not choke on unicode
doc
=
pydoc
.
render_doc
(
self
.
Q
)
saved
,
os
.
popen
=
os
.
popen
,
open
try
:
with
test
.
test_support
.
temp_cwd
():
pydoc
.
pipepager
(
doc
,
'pipe'
)
self
.
assertEqual
(
open
(
'pipe'
).
read
(),
pydoc
.
_encode
(
doc
))
finally
:
os
.
popen
=
saved
def
test_tempfilepager
(
self
):
# tempfilepager does not choke on unicode
doc
=
pydoc
.
render_doc
(
self
.
Q
)
output
=
{}
def
mock_system
(
cmd
):
import
ast
output
[
'content'
]
=
open
(
ast
.
literal_eval
(
cmd
.
strip
())).
read
()
saved
,
os
.
system
=
os
.
system
,
mock_system
try
:
pydoc
.
tempfilepager
(
doc
,
''
)
self
.
assertEqual
(
output
[
'content'
],
pydoc
.
_encode
(
doc
))
finally
:
os
.
system
=
saved
def
test_plainpager
(
self
):
# plainpager does not choke on unicode
doc
=
pydoc
.
render_doc
(
self
.
Q
)
# Note: captured_stdout is too permissive when it comes to
# unicode, and using it here would make the test always
# pass.
with
test
.
test_support
.
temp_cwd
():
with
open
(
'output'
,
'w'
)
as
f
:
saved
,
sys
.
stdout
=
sys
.
stdout
,
f
try
:
pydoc
.
plainpager
(
doc
)
finally
:
sys
.
stdout
=
saved
self
.
assertIn
(
'Rational numbers:'
,
open
(
'output'
).
read
())
def
test_ttypager
(
self
):
# ttypager does not choke on unicode
doc
=
pydoc
.
render_doc
(
self
.
Q
)
# Test ttypager
with
test
.
test_support
.
temp_cwd
(),
test
.
test_support
.
captured_stdin
():
with
open
(
'output'
,
'w'
)
as
f
:
saved
,
sys
.
stdout
=
sys
.
stdout
,
f
try
:
pydoc
.
ttypager
(
doc
)
finally
:
sys
.
stdout
=
saved
self
.
assertIn
(
'Rational numbers:'
,
open
(
'output'
).
read
())
def
test_htmlpage
(
self
):
# html.page does not choke on unicode
with
test
.
test_support
.
temp_cwd
():
with
captured_stdout
()
as
output
:
pydoc
.
writedoc
(
self
.
Q
)
self
.
assertEqual
(
output
.
getvalue
(),
'wrote Q.html
\
n
'
)
class
TestHelper
(
unittest
.
TestCase
):
def
test_keywords
(
self
):
self
.
assertEqual
(
sorted
(
pydoc
.
Helper
.
keywords
),
...
...
@@ -456,6 +546,7 @@ def test_main():
test
.
test_support
.
run_unittest
(
PydocDocTest
,
PydocImportTest
,
TestDescriptions
,
TestUnicode
,
TestHelper
)
finally
:
reap_children
()
...
...
Misc/NEWS
View file @
984f630f
...
...
@@ -30,6 +30,8 @@ Core and Builtins
Library
-------
-
Issue
#
1065986
:
pydoc
can
now
handle
unicode
strings
.
-
Issue
#
16039
:
CVE
-
2013
-
1752
:
Change
use
of
readline
in
imaplib
module
to
limit
line
length
.
Patch
by
Emil
Lind
.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment