From 0a2cd4a5fcbfefcad52d7a4fad7d1ec92e01d4d5 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov <kirr@nexedi.com> Date: Mon, 16 Sep 2024 13:26:26 +0300 Subject: [PATCH] wcfs: tests: Add test to exercies faulty client that does not reply to pin triggered by readPinWatchers Levin writes: This patch extends the test scope of 'test_wcfs_pintimeout_kill'. Before this patch, the test only ensured that a client that does not respond to pin requests during the initial watch request [1] is killed. Now it also tests that a faulty client is killed when a block is invalidated. Since there are no other situations where the WCFS server sends pin requests to a client, the tests now cover all situations where a faulty client might not respond. This patch therefore aims to increase the security that WCFS is not blocked by a faulty client. [1] See https://lab.nexedi.com/nexedi/wendelin.core/-/merge_requests/18 Preliminary history: https://lab.nexedi.com/levin.zimmermann/wendelin.core/-/commit/9d42efff Co-authored-by: Levin Zimmermann <levin.zimmermann@nexedi.com> --- wcfs/wcfs_faultyprot_test.py | 66 ++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/wcfs/wcfs_faultyprot_test.py b/wcfs/wcfs_faultyprot_test.py index c30c0aff..b104c890 100644 --- a/wcfs/wcfs_faultyprot_test.py +++ b/wcfs/wcfs_faultyprot_test.py @@ -214,7 +214,10 @@ class tFaultyClient: # ---- tests ---- -# verify that wcfs kills slow/faulty client who does not reply to pin in time. +# verify that wcfs kills slow/faulty client who does not reply to pin +# notifications in time during watch setup. +# +# This verifies setupWatch codepath. @func def _bad_watch_no_pin_reply(ctx, f, at): @@ -242,7 +245,7 @@ def _bad_watch_no_pin_reply(ctx, f, at): @xfail # protection against faulty/slow clients @func -def test_wcfs_pintimeout_kill(with_prompt_pintimeout): +def test_wcfs_pintimeout_kill_on_watch(with_prompt_pintimeout): t = tDB(multiproc=True); zf = t.zfile defer(t.close) @@ -268,6 +271,65 @@ def test_wcfs_pintimeout_kill(with_prompt_pintimeout): assert p.exitcode is not None +# verify that wcfs kills slow/faulty client who does not reply to pin +# notifications in time caused by asynchronous read access. +# +# This verifies readPinWatchers codepath. + +@func +def _bad_pinh_no_pin_reply(ctx, f, at): + wl = wcfs.WatchLink(f.wc) ; defer(wl.close) + + # initial watch setup goes ok + _ = wl.sendReq(ctx, b"watch %s @%s" % (h(f.zfile_oid), h(at))) + assert _ == b"ok", _ + f.cout.send("f: watch setup ok") + + # wait for "pin ..." due to read access in the parent + req = wl.recvReq(ctx) + assert req is not None + f.cout.send(req.msg) + + # sleep > wcfs pin timeout - wcfs must kill us + f.assertKilled(ctx, "wcfs did not kill stuck client") + +@xfail # protection against faulty/slow clients +@func +def test_wcfs_pintimeout_kill_on_access(with_prompt_pintimeout): + t = tDB(multiproc=True); zf = t.zfile; at0=t.at0 + defer(t.close) + + at1 = t.commit(zf, {2:'c1'}) + at2 = t.commit(zf, {2:'c2'}) + f = t.open(zf) + f.assertData(['','','c2']) + + # issue our watch request - it should be served well + wl = t.openwatch() + wl.watch(zf, at1, {2:at1}) + + # spawn faulty client and wait until it setups its watch + p = tFaultySubProcess(t, _bad_pinh_no_pin_reply, at=at2) + defer(p.close) + assert p.recv(t.ctx) == "f: watch setup ok" + + # commit new transaction and issue read access to modified block + # our read should be served well even though faulty client is stuck. + # As the result the faulty client should be killed by wcfs. + at3 = t.commit(zf, {1:'b3'}) + wg = sync.WorkGroup(t.ctx) + def _(ctx): + f.assertBlk(1, 'b3', {wl: {1:at0}}, timeout=2*t.pintimeout) + wg.go(_) + def _(ctx): + assert p.recv(ctx) == b"pin %s #%d @%s" % (h(zf._p_oid), 1, h(at0)) + wg.go(_) + wg.wait() + + p.join(t.ctx) + assert p.exitcode is not None + + # assertKilled assert that the current process becomes killed after time goes after pintimeout. @func(tFaultyClient) def assertKilled(f, ctx, failmsg): -- 2.30.9