golang_str: Fix bstr/ustr slice access on py2

In the patch "golang_str: bstr/ustr index access" we added __getitem__ implementation for bstr/ustr and thorough corresponding tests to cover all access cases: [i], [i:j] and [i:j:k]. The tests, however, are run via pytest which does AST rewriting, and, as it turned out, always invokes __getitem__ even for [i:j] case even on py2. Which differs from plain python2 behaviour to invoke __getslice__ for [i:j] case if __getslice__ slot is present. Since on py2 both str and unicode provide __getslice__ implementation, and bstr/ustr inherit from those types, they also inherit __getslice__. And oops, then on py2 e.g. bstr[i:j] was returning str instead of bstr: In [1]: bs = b('αβγ') In [2]: bs Out[2]: b('αβγ') In [3]: bs[0] Out[3]: b(b'\xce') In [4]: bs[0:1] Out[4]: '\xce' <-- NOTE not b(...) In [5]: type(_) Out[5]: str <-- NOTE not bstr -> Fix it by explicitly whiting out __getslice__ slot for bstr and ustr.

golang_str: Fix bstr/ustr slice access on py2
In the patch "golang_str: bstr/ustr index access" we added __getitem__ implementation for bstr/ustr and thorough corresponding tests to cover all access cases: [i], [i:j] and [i:j:k]. The tests, however, are run via pytest which does AST rewriting, and, as it turned out, always invokes __getitem__ even for [i:j] case even on py2. Which differs from plain python2 behaviour to invoke __getslice__ for [i:j] case if __getslice__ slot is present. Since on py2 both str and unicode provide __getslice__ implementation, and bstr/ustr inherit from those types, they also inherit __getslice__. And oops, then on py2 e.g. bstr[i:j] was returning str instead of bstr: In [1]: bs = b('αβγ') In [2]: bs Out[2]: b('αβγ') In [3]: bs[0] Out[3]: b(b'\xce') In [4]: bs[0:1] Out[4]: '\xce' <-- NOTE not b(...) In [5]: type(_) Out[5]: str <-- NOTE not bstr -> Fix it by explicitly whiting out __getslice__ slot for bstr and ustr.
300d7dfa · Kirill Smelkov · 859a55eb · 300d7dfa · 300d7dfa · 300d7dfa
Commit 300d7dfa authored Oct 25, 2022 by Kirill Smelkov
4 changed files
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -51,6 +51,7 @@ cdef extern from "Python.h":
    ctypedef struct PySequenceMethods:
        binaryfunc sq_concat
        binaryfunc sq_inplace_concat
+        object (*sq_slice) (object, Py_ssize_t, Py_ssize_t)     # present only on py2


 from libc.stdint cimport uint8_t
@@ -928,6 +929,15 @@ IF PY2:
    (<_PyTypeObject_Print*>Py_TYPE(pybstr())) .tp_print = _pybstr_tp_print


+# whiteout .sq_slice for pybstr/pyustr inherited from str/unicode.
+# This way slice access always goes through our __getitem__ implementation.
+# If we don't do this e.g. bstr[:] will be handled by str.__getslice__ instead
+# of bstr.__getitem__, and will return str instead of bstr.
+if PY2:
+    (<_XPyTypeObject*>pybstr) .tp_as_sequence.sq_slice = NULL
+    (<_XPyTypeObject*>pyustr) .tp_as_sequence.sq_slice = NULL
+
+
 # _bpysmartquote_u3b2 quotes bytes/bytearray s the same way python would do for string.
 #
 # nonascii_escape indicates whether \xNN with NN >= 0x80 is present in the output.

--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -577,6 +577,19 @@ def test_strings_index():
    assert U.endswith(("α","β","мир")) == True
    assert B.endswith(("α","β","мир")) == True

+def test_strings_index2():
+    # test_strings_index verifies __getitem__ thoroughly, but on py2
+    # for [x:y] access plain python uses __getslice__ if present, while
+    # pytest, because it does AST rewriting, calls __getitem__. This
+    # way [x:y] handling remains untested if verified only via pytest.
+    # -> test it also via running external program via plain python.
+    outok = readfile(dir_testprog + "/golang_test_str_index2.txt")
+    retcode, stdout, stderr = _pyrun(["golang_test_str_index2.py"],
+                                cwd=dir_testprog, stdout=PIPE, stderr=PIPE)
+    assert retcode == 0, (stdout, stderr)
+    assert stderr == b""
+    assertDoc(outok, stdout)
+

 # verify strings iteration.
 def test_strings_iter():

--- a/golang/testprog/golang_test_str_index2.py
+++ b/golang/testprog/golang_test_str_index2.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (C) 2022  Nexedi SA and Contributors.
+#                     Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Free Software licenses or any of the Open Source
+# Initiative approved licenses and Convey the resulting work. Corresponding
+# source of such a combination shall include the source code for all other
+# software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+# See https://www.nexedi.com/licensing for rationale and options.
+"""This program helps to verify [:] handling for bstr and ustr.
+
+It complements golang_str_test.test_strings_index2.
+
+It needs to verify [:] only lightly because thorough verification is done in
+test_string_index, and here we need to verify only that __getslice__, inherited
+from builtin str/unicode, does not get into our way.
+"""
+
+from __future__ import print_function, absolute_import
+
+from golang import b, u
+
+
+def main():
+    us = u("миру мир")
+    bs = b("миру мир")
+
+    def emit(what, uobj, bobj):
+        print("u"+what, repr(uobj))
+        print("b"+what, repr(bobj))
+
+    emit("s",       us,        bs)
+    emit("s[:]",    us[:],     bs[:])
+    emit("s[0:1]",  us[0:1],   bs[0:1])
+    emit("s[0:2]",  us[0:2],   bs[0:2])
+    emit("s[1:2]",  us[1:2],   bs[1:2])
+    emit("s[0:-1]", us[0:-1],  bs[0:-1])
+
+
+if __name__ == '__main__':
+    main()
--- a/golang/testprog/golang_test_str_index2.txt
+++ b/golang/testprog/golang_test_str_index2.txt
+us u('миру мир')
+bs b('миру мир')
+us[:] u('миру мир')
+bs[:] b('миру мир')
+us[0:1] u('м')
+bs[0:1] b(b'\xd0')
+us[0:2] u('ми')
+bs[0:2] b('м')
+us[1:2] u('и')
+bs[1:2] b(b'\xbc')
+us[0:-1] u('миру ми')
+bs[0:-1] b(b'миру ми\xd1')