Commit 69524821 authored by Serhiy Storchaka's avatar Serhiy Storchaka Committed by GitHub

bpo-33189: pygettext.py now accepts only literal strings (GH-6364)

as docstrings and translatable strings, and rejects
bytes literals and f-string expressions.
parent b7e1eff8
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
import os import os
import sys import sys
import unittest import unittest
import textwrap from textwrap import dedent
from test.support.script_helper import assert_python_ok from test.support.script_helper import assert_python_ok
from test.test_tools import skip_if_missing, toolsdir from test.test_tools import skip_if_missing, toolsdir
...@@ -109,9 +109,68 @@ class Test_pygettext(unittest.TestCase): ...@@ -109,9 +109,68 @@ class Test_pygettext(unittest.TestCase):
# This will raise if the date format does not exactly match. # This will raise if the date format does not exactly match.
datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z') datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
def test_funcdocstring(self):
for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
with self.subTest(doc):
msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar):
%s
''' % doc))
self.assertIn('doc', msgids)
def test_funcdocstring_bytes(self):
msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar):
b"""doc"""
'''))
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
def test_funcdocstring_fstring(self):
msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar):
f"""doc"""
'''))
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
def test_classdocstring(self):
for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
with self.subTest(doc):
msgids = self.extract_docstrings_from_str(dedent('''\
class C:
%s
''' % doc))
self.assertIn('doc', msgids)
def test_classdocstring_bytes(self):
msgids = self.extract_docstrings_from_str(dedent('''\
class C:
b"""doc"""
'''))
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
def test_classdocstring_fstring(self):
msgids = self.extract_docstrings_from_str(dedent('''\
class C:
f"""doc"""
'''))
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
def test_msgid(self):
msgids = self.extract_docstrings_from_str(
'''_("""doc""" r'str' u"ing")''')
self.assertIn('docstring', msgids)
def test_msgid_bytes(self):
msgids = self.extract_docstrings_from_str('_(b"""doc""")')
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
def test_msgid_fstring(self):
msgids = self.extract_docstrings_from_str('_(f"""doc""")')
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
def test_funcdocstring_annotated_args(self): def test_funcdocstring_annotated_args(self):
""" Test docstrings for functions with annotated args """ """ Test docstrings for functions with annotated args """
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\ msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar: str): def foo(bar: str):
"""doc""" """doc"""
''')) '''))
...@@ -119,7 +178,7 @@ class Test_pygettext(unittest.TestCase): ...@@ -119,7 +178,7 @@ class Test_pygettext(unittest.TestCase):
def test_funcdocstring_annotated_return(self): def test_funcdocstring_annotated_return(self):
""" Test docstrings for functions with annotated return type """ """ Test docstrings for functions with annotated return type """
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\ msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar) -> str: def foo(bar) -> str:
"""doc""" """doc"""
''')) '''))
...@@ -127,7 +186,7 @@ class Test_pygettext(unittest.TestCase): ...@@ -127,7 +186,7 @@ class Test_pygettext(unittest.TestCase):
def test_funcdocstring_defvalue_args(self): def test_funcdocstring_defvalue_args(self):
""" Test docstring for functions with default arg values """ """ Test docstring for functions with default arg values """
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\ msgids = self.extract_docstrings_from_str(dedent('''\
def foo(bar=()): def foo(bar=()):
"""doc""" """doc"""
''')) '''))
...@@ -137,7 +196,7 @@ class Test_pygettext(unittest.TestCase): ...@@ -137,7 +196,7 @@ class Test_pygettext(unittest.TestCase):
""" Test docstring extraction for multiple functions combining """ Test docstring extraction for multiple functions combining
annotated args, annotated return types and default arg values annotated args, annotated return types and default arg values
""" """
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\ msgids = self.extract_docstrings_from_str(dedent('''\
def foo1(bar: tuple=()) -> str: def foo1(bar: tuple=()) -> str:
"""doc1""" """doc1"""
...@@ -155,7 +214,7 @@ class Test_pygettext(unittest.TestCase): ...@@ -155,7 +214,7 @@ class Test_pygettext(unittest.TestCase):
""" Test docstring extraction for a class with colons occuring within """ Test docstring extraction for a class with colons occuring within
the parentheses. the parentheses.
""" """
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\ msgids = self.extract_docstrings_from_str(dedent('''\
class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)): class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)):
"""doc""" """doc"""
''')) '''))
......
:program:`pygettext.py` now recognizes only literal strings as docstrings
and translatable strings, and rejects bytes literals and f-string expressions.
...@@ -232,6 +232,10 @@ def escape_nonascii(s, encoding): ...@@ -232,6 +232,10 @@ def escape_nonascii(s, encoding):
return ''.join(escapes[b] for b in s.encode(encoding)) return ''.join(escapes[b] for b in s.encode(encoding))
def is_literal_string(s):
return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
def safe_eval(s): def safe_eval(s):
# unwrap quotes, safely # unwrap quotes, safely
return eval(s, {'__builtins__':{}}, {}) return eval(s, {'__builtins__':{}}, {})
...@@ -317,8 +321,8 @@ class TokenEater: ...@@ -317,8 +321,8 @@ class TokenEater:
def __call__(self, ttype, tstring, stup, etup, line): def __call__(self, ttype, tstring, stup, etup, line):
# dispatch # dispatch
## import token ## import token
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ ## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
## 'tstring:', tstring ## file=sys.stderr)
self.__state(ttype, tstring, stup[0]) self.__state(ttype, tstring, stup[0])
def __waiting(self, ttype, tstring, lineno): def __waiting(self, ttype, tstring, lineno):
...@@ -327,7 +331,7 @@ class TokenEater: ...@@ -327,7 +331,7 @@ class TokenEater:
if opts.docstrings and not opts.nodocstrings.get(self.__curfile): if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
# module docstring? # module docstring?
if self.__freshmodule: if self.__freshmodule:
if ttype == tokenize.STRING: if ttype == tokenize.STRING and is_literal_string(tstring):
self.__addentry(safe_eval(tstring), lineno, isdocstring=1) self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__freshmodule = 0 self.__freshmodule = 0
elif ttype not in (tokenize.COMMENT, tokenize.NL): elif ttype not in (tokenize.COMMENT, tokenize.NL):
...@@ -353,7 +357,7 @@ class TokenEater: ...@@ -353,7 +357,7 @@ class TokenEater:
def __suitedocstring(self, ttype, tstring, lineno): def __suitedocstring(self, ttype, tstring, lineno):
# ignore any intervening noise # ignore any intervening noise
if ttype == tokenize.STRING: if ttype == tokenize.STRING and is_literal_string(tstring):
self.__addentry(safe_eval(tstring), lineno, isdocstring=1) self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__state = self.__waiting self.__state = self.__waiting
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
...@@ -378,7 +382,7 @@ class TokenEater: ...@@ -378,7 +382,7 @@ class TokenEater:
if self.__data: if self.__data:
self.__addentry(EMPTYSTRING.join(self.__data)) self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting self.__state = self.__waiting
elif ttype == tokenize.STRING: elif ttype == tokenize.STRING and is_literal_string(tstring):
self.__data.append(safe_eval(tstring)) self.__data.append(safe_eval(tstring))
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
token.NEWLINE, tokenize.NL]: token.NEWLINE, tokenize.NL]:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment