Commit 84def52e authored by Kirill Smelkov's avatar Kirill Smelkov

lib/mem += memdelta

This is utility function that we will need to use in the next patch to
see how data of two blocks are similar to each other.

We use numpy for the implementation because this code will be hot and if we
don't use optimized C routines writeout will become very slow.

Quoting draft patch kirr/wendelin.core@3f631932 :

    -> Also optimize ndelta computation - when done in plain python just
       this part was taking a lot of time as timing for initial writeup
       showed:

         writeup with ZBlk0: ~20-25s
         writeup with ZBlk1: ~20-30s
         writeup with auto:  was ~ 120s

       now, after switching to numpy for ndelta computation, whole runtime
       with 'auto' is taking ~ 35s. The whole runtime, if I observe
       benchmark execution correctly, is dominated by database writeup.

/reviewed-by @levin.zimmermann
/reviewed-on nexedi/wendelin.core!20
parent da765ef7
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Wendelin. Memory helpers # Wendelin. Memory helpers
# Copyright (C) 2014-2015 Nexedi SA and Contributors. # Copyright (C) 2014-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
# #
# See COPYING file for full licensing terms. # See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options. # See https://www.nexedi.com/licensing for rationale and options.
from numpy import ndarray, uint8, copyto from numpy import ndarray, uint8, copyto, count_nonzero
# zero buffer memory # zero buffer memory
...@@ -42,3 +42,15 @@ def memcpy(dst, src): ...@@ -42,3 +42,15 @@ def memcpy(dst, src):
adst = ndarray(l, buffer=dst, dtype=uint8) adst = ndarray(l, buffer=dst, dtype=uint8)
asrc = ndarray(l, buffer=src, dtype=uint8) asrc = ndarray(l, buffer=src, dtype=uint8)
copyto(adst, asrc) copyto(adst, asrc)
# memdelta returns how many bytes are different in between buf1 and buf2.
def memdelta(buf1, buf2):
l1 = len(buf1)
l2 = len(buf2)
l = min(l1, l2)
l_max = max(l1, l2)
a1 = ndarray(l, buffer=buf1, dtype=uint8)
a2 = ndarray(l, buffer=buf2, dtype=uint8)
d = a1 - a2
return (l_max - l) + count_nonzero(d)
# Copyright (C) 2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
from wendelin.lib.mem import memdelta
def test_memdelta():
def _(a, b, ndelta):
assert memdelta(a, b) == ndelta
_(b'', b'', 0)
_(b'', b'123', 3)
_(b'ab', b'', 2)
_(b'abc', b'abc', 0)
_(b'aXc', b'aYc', 1)
_(b'aXcZ', b'aYc', 2)
_(b'aXcZ', b'aYcZ', 1)
_(b'aXcZ', b'aYcQ', 2)
_(b'aXcZ', b'aYcQR', 3)
_(b'aXcZE', b'aYcQR', 3)
_(b'aXcZEF', b'aYcQR', 4)
_(b'aXcZEF', b'aYcQRS', 4)
_(b'aXcdEF', b'aYcdRS', 3)
_(b'aXcdeF', b'aYcdeS', 2)
_(b'aXcdef', b'aYcdef', 1)
_(b'abcdef', b'abcdef', 0)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment