Commit 758727a4 authored by Kirill Smelkov's avatar Kirill Smelkov

golang_str: Switch bstr/ustr to cdef classes

For gpython to switch builtin str/unicode to bstr/ustr we will need
bstr/ustr to have exactly the same C layout as builtin string types.
This is possible to achieve only via `cdef class`. It is also good to
switch to `cdef class` for RAM savings - from https://github.com/cython/cython/pull/5212#issuecomment-1387659026 :

    # what Cython does at runtime for `class MyBytes(bytes)`
    In [3]: MyBytes = type('MyBytes', (bytes,), {'__slots__': ()})

    In [4]: MyBytes
    Out[4]: __main__.MyBytes

    In [5]: a = bytes(b'123')

    In [6]: b = MyBytes(b'123')

    In [7]: a
    Out[7]: b'123'

    In [8]: b
    Out[8]: b'123'

    In [9]: a == b
    Out[9]: True

    In [10]: import sys

    In [11]: sys.getsizeof(a)
    Out[11]: 36

    In [12]: sys.getsizeof(b)
    Out[12]: 52

So with `cdef class` we gain more control and optimize memory usage.

This was not done before because cython forbids to `cdef class X(bytes)` due to
https://github.com/cython/cython/issues/711. We work it around in setup.py with
draft for proper patch pre-posted to upstream in https://github.com/cython/cython/pull/5212 .
parent 9a075b17
This diff is collapsed.
...@@ -31,7 +31,7 @@ import sys ...@@ -31,7 +31,7 @@ import sys
import six import six
from six import text_type as unicode, unichr from six import text_type as unicode, unichr
from six.moves import range as xrange from six.moves import range as xrange
import re, pickle, copy, types import gc, re, pickle, copy, types
import array, collections import array, collections
...@@ -284,6 +284,25 @@ def test_strings_basic(): ...@@ -284,6 +284,25 @@ def test_strings_basic():
bs.hello = 1 bs.hello = 1
# verify that bstr/ustr are created with correct refcount.
def test_strings_refcount():
# first verify our logic on std type
obj = xbytes(u'abc'); assert type(obj) is bytes
gc.collect(); assert sys.getrefcount(obj) == 1+1 # +1 due to obj passed to getrefcount call
# bstr
obj = b('abc'); assert type(obj) is bstr
gc.collect(); assert sys.getrefcount(obj) == 1+1
obj = bstr('abc'); assert type(obj) is bstr
gc.collect(); assert sys.getrefcount(obj) == 1+1
# ustr
obj = u('abc'); assert type(obj) is ustr
gc.collect(); assert sys.getrefcount(obj) == 1+1
obj = ustr('abc'); assert type(obj) is ustr
gc.collect(); assert sys.getrefcount(obj) == 1+1
# verify memoryview(bstr|ustr). # verify memoryview(bstr|ustr).
def test_strings_memoryview(): def test_strings_memoryview():
bs = b('мир') bs = b('мир')
......
...@@ -19,6 +19,25 @@ ...@@ -19,6 +19,25 @@
# See COPYING file for full licensing terms. # See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options. # See https://www.nexedi.com/licensing for rationale and options.
# patch cython to allow `cdef class X(bytes)` while building pygolang to
# workaround https://github.com/cython/cython/issues/711
# see `cdef class pybstr` in golang/_golang_str.pyx for details.
# (should become unneeded with cython 3 once https://github.com/cython/cython/pull/5212 is finished)
import inspect
from Cython.Compiler.PyrexTypes import BuiltinObjectType
def pygo_cy_builtin_type_name_set(self, v):
self._pygo_name = v
def pygo_cy_builtin_type_name_get(self):
name = self._pygo_name
if name == 'bytes':
caller = inspect.currentframe().f_back.f_code.co_name
if caller == 'analyse_declarations':
# need anything different from 'bytes' to deactivate check in
# https://github.com/cython/cython/blob/c21b39d4/Cython/Compiler/Nodes.py#L4759-L4762
name = 'xxx'
return name
BuiltinObjectType.name = property(pygo_cy_builtin_type_name_get, pygo_cy_builtin_type_name_set)
from setuptools import find_packages from setuptools import find_packages
from setuptools.command.install_scripts import install_scripts as _install_scripts from setuptools.command.install_scripts import install_scripts as _install_scripts
from setuptools.command.develop import develop as _develop from setuptools.command.develop import develop as _develop
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment