Commit 69aab23a authored by Kirill Smelkov's avatar Kirill Smelkov

wcfs: client: Adjust Cython part to accept both bytes and str input, and yield bstr output

wcfs/client/_wcfs.pyx provides Cython wrapper over C++ WCFS client that
works with bytes-based std::string messages. On py2 everything works ok,
but on py3, due to this, it rejects str given as input argument, e.g. as follows:

    ```python
    _____________________________ test_join_autostart ______________________________

        @func
        def test_join_autostart():
            zurl = testzurl
            with raises(RuntimeError, match="wcfs: join .*: server not running"):
                wcfs.join(zurl, autostart=False)

            assert wcfs._wcregistry == {}
            def _():
                assert wcfs._wcregistry == {}
            defer(_)

    >       wc = wcfs.join(zurl, autostart=True)

    wcfs/wcfs_test.py:164:
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
    wcfs/__init__.py:225: in join
        wc = WCFS(mntpt, fwcfs, wcsrv)
    ../../venvs/wendelin.core/lib/python3.9/site-packages/decorator.py:232: in fun
        return caller(func, *(extras + args), **kw)
    ../pygolang/golang/__init__.py:125: in _
        return f(*argv, **kw)
    wcfs/__init__.py:167: in __init__
        wc.mountpoint = mountpoint
    wcfs/client/_wcfs.pyx:44: in wendelin.wcfs.client._wcfs.PyWCFS.mountpoint.__set__
        def __set__(PyWCFS pywc, string v):
    _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

    >   ???
    E   TypeError: expected bytes, str found
    ```

because by default Cython treats std::string as related to bytes on py side.

-> Fix it by accepting both str and bytes as input for all methods
   arguments related to strings.

For returned strings care to return them as strings, not bytes, to which
Cython converts std::string by default because calling code expects
returned messages to have string semantic. Though we return the data as
bytestring, not unicode, as the rest of the testsuite also assumes
binary messages reception from WCFS server.

NOTE even though it was me to originally suggest in private to use

    cython: c_string_type=str, c_string_encoding=utf8

so that str type is accepted as input, later, when having a broader
look, I realized that there are two problems with the above directives.
First the directives affect not only the input, but also any std::string
returned becomes returned as unicode instead of bytes/bytestr previously.
However as explained above the higher level expects binary semantic from
returned messages. And second if WCFS sends a message with invalid UTF-8
data, it will result in exception thrown on the client instead of
actually returning sent data to the caller. This makes debugging more
difficult and last thing I want to happen is, when WCFS sends some
garbage, to get a UnicodeDecodeError instead of actually seeing the message
and higher level assert saying that that message is unexpected with
providing details.

So do all the in- and out- conversions by hand instead with controlling
desired semantics ourselves.

On py3 the implementation depends on nexedi/pygolang!21,
but on py2 it works both with and without pygolang bstr patches.

Preliminary history:

    vnmabus/wendelin.core@47c27b03Co-authored-by: Carlos Ramos Carreño's avatarCarlos Ramos Carreño <carlos.ramos@nexedi.com>
parent 2a2b9e30
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2021 Nexedi SA and Contributors.
# Copyright (C) 2018-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
......@@ -26,6 +26,7 @@
# See _wcfs.pxd for package overview.
from golang cimport pychan, pyerror, nil
from golang import b as pyb # TODO cimport directly after https://lab.nexedi.com/nexedi/pygolang/-/merge_requests/21 is merged
from golang cimport io
cdef extern from *:
......@@ -40,9 +41,9 @@ cdef class PyWCFS:
property mountpoint:
def __get__(PyWCFS pywc):
return pywc.wc.mountpoint
def __set__(PyWCFS pywc, string v):
pywc.wc.mountpoint = v
return str(pyb(pywc.wc.mountpoint)) # TODO remove str(·) after bstr can be mixed with unicode in os.path.join
def __set__(PyWCFS pywc, v):
pywc.wc.mountpoint = pyb(v)
def connect(PyWCFS pywc, pyat): # -> PyConn
cdef Tid at = u64(pyat)
......@@ -197,7 +198,8 @@ cdef class PyWatchLink:
raise pyerr(err)
def sendReq(PyWatchLink pywlink, context.PyContext pyctx, string req): # -> reply(string)
def sendReq(PyWatchLink pywlink, context.PyContext pyctx, pyreq): # -> reply(bstr)
cdef string req = pyb(pyreq)
with nogil:
_ = wlink_sendReq_pyexc(pywlink.wlink, pyctx.ctx, req)
reply = _.first
......@@ -206,7 +208,7 @@ cdef class PyWatchLink:
if err != nil:
raise pyerr(err)
return reply
return pyb(reply)
def recvReq(PyWatchLink pywlink, context.PyContext pyctx): # -> PinReq | None when EOF
cdef PyPinReq pyreq = PyPinReq.__new__(PyPinReq)
......@@ -220,7 +222,8 @@ cdef class PyWatchLink:
return pyreq
def replyReq(PyWatchLink pywlink, context.PyContext pyctx, PyPinReq pyreq, string reply):
def replyReq(PyWatchLink pywlink, context.PyContext pyctx, PyPinReq pyreq, pyreply):
cdef string reply = pyb(pyreply)
with nogil:
err = wlink_replyReq_pyexc(pywlink.wlink, pyctx.ctx, &pyreq.pinreq, reply)
......@@ -259,11 +262,11 @@ cdef class PyPinReq:
# wcfs_test.py uses req.msg in several places
property msg:
def __get__(PyPinReq pypin):
return pypin.pinreq.msg
return pyb(pypin.pinreq.msg)
def _tpywlinkwrite(PyWatchLink pywlink, bytes pypkt):
cdef string pkt = pypkt
def _tpywlinkwrite(PyWatchLink pywlink, pypkt):
cdef string pkt = pyb(pypkt)
with nogil:
err = _twlinkwrite_pyexc(pywlink.wlink, pkt)
if err != nil:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment