Commit 06cb94bc authored by Serhiy Storchaka's avatar Serhiy Storchaka Committed by GitHub

bpo-13153: Use OS native encoding for converting between Python and Tcl. (GH-16545)

On Windows use UTF-16 (or UTF-32 for 32-bit Tcl_UniChar) with the
"surrogatepass" error handler for converting to/from Tcl Unicode objects.

On Linux use UTF-8 with the "surrogateescape" error handler for converting
to/from Tcl String objects.

Converting strings from Tcl to Python and back now never fails
(except MemoryError).
parent 2290b23d
...@@ -358,21 +358,6 @@ class EditorWindow(object): ...@@ -358,21 +358,6 @@ class EditorWindow(object):
Font(text, font=text.cget('font')).measure('0') Font(text, font=text.cget('font')).measure('0')
self.width = pixel_width // zero_char_width self.width = pixel_width // zero_char_width
def _filename_to_unicode(self, filename):
"""Return filename as BMP unicode so displayable in Tk."""
# Decode bytes to unicode.
if isinstance(filename, bytes):
try:
filename = filename.decode(self.filesystemencoding)
except UnicodeDecodeError:
try:
filename = filename.decode(self.encoding)
except UnicodeDecodeError:
# byte-to-byte conversion
filename = filename.decode('iso8859-1')
# Replace non-BMP char with diamond questionmark.
return re.sub('[\U00010000-\U0010FFFF]', '\ufffd', filename)
def new_callback(self, event): def new_callback(self, event):
dirname, basename = self.io.defaultfilename() dirname, basename = self.io.defaultfilename()
self.flist.new(dirname) self.flist.new(dirname)
...@@ -963,10 +948,8 @@ class EditorWindow(object): ...@@ -963,10 +948,8 @@ class EditorWindow(object):
menu.delete(0, END) # clear, and rebuild: menu.delete(0, END) # clear, and rebuild:
for i, file_name in enumerate(rf_list): for i, file_name in enumerate(rf_list):
file_name = file_name.rstrip() # zap \n file_name = file_name.rstrip() # zap \n
# make unicode string to display non-ASCII chars correctly
ufile_name = self._filename_to_unicode(file_name)
callback = instance.__recent_file_callback(file_name) callback = instance.__recent_file_callback(file_name)
menu.add_command(label=ulchars[i] + " " + ufile_name, menu.add_command(label=ulchars[i] + " " + file_name,
command=callback, command=callback,
underline=0) underline=0)
...@@ -1004,16 +987,10 @@ class EditorWindow(object): ...@@ -1004,16 +987,10 @@ class EditorWindow(object):
def short_title(self): def short_title(self):
filename = self.io.filename filename = self.io.filename
if filename: return os.path.basename(filename) if filename else "untitled"
filename = os.path.basename(filename)
else:
filename = "untitled"
# return unicode string to display non-ASCII chars correctly
return self._filename_to_unicode(filename)
def long_title(self): def long_title(self):
# return unicode string to display non-ASCII chars correctly return self.io.filename or ""
return self._filename_to_unicode(self.io.filename or "")
def center_insert_event(self, event): def center_insert_event(self, event):
self.center() self.center()
......
...@@ -30,18 +30,6 @@ class EditorWindowTest(unittest.TestCase): ...@@ -30,18 +30,6 @@ class EditorWindowTest(unittest.TestCase):
e._close() e._close()
class EditorFunctionTest(unittest.TestCase):
def test_filename_to_unicode(self):
func = Editor._filename_to_unicode
class dummy():
filesystemencoding = 'utf-8'
pairs = (('abc', 'abc'), ('a\U00011111c', 'a\ufffdc'),
(b'abc', 'abc'), (b'a\xf0\x91\x84\x91c', 'a\ufffdc'))
for inp, out in pairs:
self.assertEqual(func(dummy, inp), out)
class TestGetLineIndent(unittest.TestCase): class TestGetLineIndent(unittest.TestCase):
def test_empty_lines(self): def test_empty_lines(self):
for tabwidth in [1, 2, 4, 6, 8]: for tabwidth in [1, 2, 4, 6, 8]:
......
...@@ -679,14 +679,6 @@ class ModifiedInterpreter(InteractiveInterpreter): ...@@ -679,14 +679,6 @@ class ModifiedInterpreter(InteractiveInterpreter):
self.more = 0 self.more = 0
# at the moment, InteractiveInterpreter expects str # at the moment, InteractiveInterpreter expects str
assert isinstance(source, str) assert isinstance(source, str)
#if isinstance(source, str):
# from idlelib import iomenu
# try:
# source = source.encode(iomenu.encoding)
# except UnicodeError:
# self.tkconsole.resetoutput()
# self.write("Unsupported characters in input\n")
# return
# InteractiveInterpreter.runsource() calls its runcode() method, # InteractiveInterpreter.runsource() calls its runcode() method,
# which is overridden (see below) # which is overridden (see below)
return InteractiveInterpreter.runsource(self, source, filename) return InteractiveInterpreter.runsource(self, source, filename)
...@@ -1298,16 +1290,6 @@ class PyShell(OutputWindow): ...@@ -1298,16 +1290,6 @@ class PyShell(OutputWindow):
self.set_line_and_column() self.set_line_and_column()
def write(self, s, tags=()): def write(self, s, tags=()):
if isinstance(s, str) and len(s) and max(s) > '\uffff':
# Tk doesn't support outputting non-BMP characters
# Let's assume what printed string is not very long,
# find first non-BMP character and construct informative
# UnicodeEncodeError exception.
for start, char in enumerate(s):
if char > '\uffff':
break
raise UnicodeEncodeError("UCS-2", char, start, start+1,
'Non-BMP character not supported in Tk')
try: try:
self.text.mark_gravity("iomark", "right") self.text.mark_gravity("iomark", "right")
count = OutputWindow.write(self, s, tags, "iomark") count = OutputWindow.write(self, s, tags, "iomark")
......
...@@ -147,8 +147,7 @@ class ScriptBinding: ...@@ -147,8 +147,7 @@ class ScriptBinding:
interp = self.shell.interp interp = self.shell.interp
if pyshell.use_subprocess and restart: if pyshell.use_subprocess and restart:
interp.restart_subprocess( interp.restart_subprocess(
with_cwd=False, filename= with_cwd=False, filename=filename)
self.editwin._filename_to_unicode(filename))
dirname = os.path.dirname(filename) dirname = os.path.dirname(filename)
argv = [filename] argv = [filename]
if self.cli_args: if self.cli_args:
......
...@@ -429,9 +429,12 @@ class TclTest(unittest.TestCase): ...@@ -429,9 +429,12 @@ class TclTest(unittest.TestCase):
self.assertEqual(passValue(False), False if self.wantobjects else '0') self.assertEqual(passValue(False), False if self.wantobjects else '0')
self.assertEqual(passValue('string'), 'string') self.assertEqual(passValue('string'), 'string')
self.assertEqual(passValue('string\u20ac'), 'string\u20ac') self.assertEqual(passValue('string\u20ac'), 'string\u20ac')
self.assertEqual(passValue('string\U0001f4bb'), 'string\U0001f4bb')
self.assertEqual(passValue('str\x00ing'), 'str\x00ing') self.assertEqual(passValue('str\x00ing'), 'str\x00ing')
self.assertEqual(passValue('str\x00ing\xbd'), 'str\x00ing\xbd') self.assertEqual(passValue('str\x00ing\xbd'), 'str\x00ing\xbd')
self.assertEqual(passValue('str\x00ing\u20ac'), 'str\x00ing\u20ac') self.assertEqual(passValue('str\x00ing\u20ac'), 'str\x00ing\u20ac')
self.assertEqual(passValue('str\x00ing\U0001f4bb'),
'str\x00ing\U0001f4bb')
self.assertEqual(passValue(b'str\x00ing'), self.assertEqual(passValue(b'str\x00ing'),
b'str\x00ing' if self.wantobjects else 'str\x00ing') b'str\x00ing' if self.wantobjects else 'str\x00ing')
self.assertEqual(passValue(b'str\xc0\x80ing'), self.assertEqual(passValue(b'str\xc0\x80ing'),
...@@ -490,6 +493,7 @@ class TclTest(unittest.TestCase): ...@@ -490,6 +493,7 @@ class TclTest(unittest.TestCase):
check('string') check('string')
check('string\xbd') check('string\xbd')
check('string\u20ac') check('string\u20ac')
check('string\U0001f4bb')
check('') check('')
check(b'string', 'string') check(b'string', 'string')
check(b'string\xe2\x82\xac', 'string\xe2\x82\xac') check(b'string\xe2\x82\xac', 'string\xe2\x82\xac')
...@@ -531,6 +535,7 @@ class TclTest(unittest.TestCase): ...@@ -531,6 +535,7 @@ class TclTest(unittest.TestCase):
('a\n b\t\r c\n ', ('a', 'b', 'c')), ('a\n b\t\r c\n ', ('a', 'b', 'c')),
(b'a\n b\t\r c\n ', ('a', 'b', 'c')), (b'a\n b\t\r c\n ', ('a', 'b', 'c')),
('a \u20ac', ('a', '\u20ac')), ('a \u20ac', ('a', '\u20ac')),
('a \U0001f4bb', ('a', '\U0001f4bb')),
(b'a \xe2\x82\xac', ('a', '\u20ac')), (b'a \xe2\x82\xac', ('a', '\u20ac')),
(b'a\xc0\x80b c\xc0\x80d', ('a\x00b', 'c\x00d')), (b'a\xc0\x80b c\xc0\x80d', ('a\x00b', 'c\x00d')),
('a {b c}', ('a', 'b c')), ('a {b c}', ('a', 'b c')),
......
...@@ -170,6 +170,28 @@ class MiscTest(AbstractTkTest, unittest.TestCase): ...@@ -170,6 +170,28 @@ class MiscTest(AbstractTkTest, unittest.TestCase):
with self.assertRaises(tkinter.TclError): with self.assertRaises(tkinter.TclError):
root.tk.call('after', 'info', idle1) root.tk.call('after', 'info', idle1)
def test_clipboard(self):
root = self.root
root.clipboard_clear()
root.clipboard_append('Ùñî')
self.assertEqual(root.clipboard_get(), 'Ùñî')
root.clipboard_append('çōđě')
self.assertEqual(root.clipboard_get(), 'Ùñîçōđě')
root.clipboard_clear()
with self.assertRaises(tkinter.TclError):
root.clipboard_get()
def test_clipboard_astral(self):
root = self.root
root.clipboard_clear()
root.clipboard_append('𝔘𝔫𝔦')
self.assertEqual(root.clipboard_get(), '𝔘𝔫𝔦')
root.clipboard_append('𝔠𝔬𝔡𝔢')
self.assertEqual(root.clipboard_get(), '𝔘𝔫𝔦𝔠𝔬𝔡𝔢')
root.clipboard_clear()
with self.assertRaises(tkinter.TclError):
root.clipboard_get()
tests_gui = (MiscTest, ) tests_gui = (MiscTest, )
......
...@@ -489,8 +489,7 @@ class ComboboxTest(EntryTest, unittest.TestCase): ...@@ -489,8 +489,7 @@ class ComboboxTest(EntryTest, unittest.TestCase):
expected=('mon', 'tue', 'wed', 'thur')) expected=('mon', 'tue', 'wed', 'thur'))
self.checkParam(self.combo, 'values', ('mon', 'tue', 'wed', 'thur')) self.checkParam(self.combo, 'values', ('mon', 'tue', 'wed', 'thur'))
self.checkParam(self.combo, 'values', (42, 3.14, '', 'any string')) self.checkParam(self.combo, 'values', (42, 3.14, '', 'any string'))
self.checkParam(self.combo, 'values', '', self.checkParam(self.combo, 'values', '')
expected='' if get_tk_patchlevel() < (8, 5, 10) else ())
self.combo['values'] = ['a', 1, 'c'] self.combo['values'] = ['a', 1, 'c']
...@@ -1245,12 +1244,7 @@ class SpinboxTest(EntryTest, unittest.TestCase): ...@@ -1245,12 +1244,7 @@ class SpinboxTest(EntryTest, unittest.TestCase):
expected=('mon', 'tue', 'wed', 'thur')) expected=('mon', 'tue', 'wed', 'thur'))
self.checkParam(self.spin, 'values', ('mon', 'tue', 'wed', 'thur')) self.checkParam(self.spin, 'values', ('mon', 'tue', 'wed', 'thur'))
self.checkParam(self.spin, 'values', (42, 3.14, '', 'any string')) self.checkParam(self.spin, 'values', (42, 3.14, '', 'any string'))
self.checkParam( self.checkParam(self.spin, 'values', '')
self.spin,
'values',
'',
expected='' if get_tk_patchlevel() < (8, 5, 10) else ()
)
self.spin['values'] = ['a', 1, 'c'] self.spin['values'] = ['a', 1, 'c']
...@@ -1308,8 +1302,7 @@ class TreeviewTest(AbstractWidgetTest, unittest.TestCase): ...@@ -1308,8 +1302,7 @@ class TreeviewTest(AbstractWidgetTest, unittest.TestCase):
self.checkParam(widget, 'columns', 'a b c', self.checkParam(widget, 'columns', 'a b c',
expected=('a', 'b', 'c')) expected=('a', 'b', 'c'))
self.checkParam(widget, 'columns', ('a', 'b', 'c')) self.checkParam(widget, 'columns', ('a', 'b', 'c'))
self.checkParam(widget, 'columns', (), self.checkParam(widget, 'columns', '')
expected='' if get_tk_patchlevel() < (8, 5, 10) else ())
def test_displaycolumns(self): def test_displaycolumns(self):
widget = self.create() widget = self.create()
......
OS native encoding is now used for converting between Python strings and
Tcl objects. This allows to display, copy and paste to clipboard emoji and
other non-BMP characters. Converting strings from Tcl to Python and back
now never fails (except MemoryError).
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment