Commit 878b2787 authored by Kirill Smelkov's avatar Kirill Smelkov

X draft loading

Got:

	[21871.340400] INFO: task wcfs:27933 blocked for more than 120 seconds.
	[21871.340414]       Not tainted 4.18.0-1-amd64 #1 Debian 4.18.6-1
	[21871.340420] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
	[21871.340427] wcfs            D    0 27933  27889 0x00000004
	[21871.340440] Call Trace:
	[21871.340463]  ? __schedule+0x2b7/0x880
	[21871.340476]  schedule+0x28/0x80
	[21871.340490]  io_schedule+0x12/0x40
	[21871.340506]  __lock_page+0x115/0x160
	[21871.340521]  ? page_cache_tree_insert+0xe0/0xe0
	[21871.340535]  pagecache_get_page+0x185/0x2a0
	[21871.340549]  ? ilookup5+0x45/0x90
	[21871.340572]  fuse_dev_do_write+0x3c8/0xee0 [fuse]
	[21871.340587]  ? futex_wait_queue_me+0xd3/0x120
	[21871.340604]  fuse_dev_write+0x4e/0x80 [fuse]
	[21871.340622]  do_iter_readv_writev+0x145/0x1a0
	[21871.340632]  do_iter_write+0x80/0x190
	[21871.340642]  vfs_writev+0x98/0x110
	[21871.340657]  ? new_sync_write+0x104/0x160
	[21871.340668]  ? do_writev+0x5e/0xf0
	[21871.340676]  do_writev+0x5e/0xf0
	[21871.340685]  ? ksys_write+0x76/0xc0
	[21871.340699]  do_syscall_64+0x55/0x110
	[21871.340714]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
	[21871.340725] RIP: 0033:0x478b70
	[21871.340730] Code: Bad RIP value.
	[21871.340752] RSP: 002b:000000c000149938 EFLAGS: 00000216 ORIG_RAX: 0000000000000014
	[21871.340763] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000478b70
	[21871.340770] RDX: 0000000000000002 RSI: 000000c00009e020 RDI: 0000000000000008
	[21871.340777] RBP: 000000c000149988 R08: 0000000000000000 R09: 0000000000000000
	[21871.340784] R10: 0000000000000000 R11: 0000000000000216 R12: 0000000000000004
	[21871.340790] R13: 0000000000000055 R14: 000000000068877a R15: 0000000000000038
parent c295664e
...@@ -250,11 +250,14 @@ package main ...@@ -250,11 +250,14 @@ package main
import ( import (
"context" "context"
"flag" "flag"
"fmt"
"log" "log"
"os" "os"
"sync" "sync"
"syscall" "syscall"
"golang.org/x/sync/errgroup"
"lab.nexedi.com/kirr/go123/xcontext" "lab.nexedi.com/kirr/go123/xcontext"
"lab.nexedi.com/kirr/neo/go/transaction" "lab.nexedi.com/kirr/neo/go/transaction"
...@@ -468,12 +471,52 @@ func (bfdata *BigFileData) Read(_ nodefs.File, dest []byte, off int64, fctx *fus ...@@ -468,12 +471,52 @@ func (bfdata *BigFileData) Read(_ nodefs.File, dest []byte, off int64, fctx *fus
// XXX locking // XXX locking
bf := bfdata.bigfile bf := bfdata.bigfile
zbf := bf.zbf
// XXX better ctx = transaction.PutIntoContext(ctx, txn) // XXX better ctx = transaction.PutIntoContext(ctx, txn)
ctx, cancel := xcontext.Merge(asctx(fctx), bf.txnCtx) ctx, cancel := xcontext.Merge(asctx(fctx), bf.txnCtx)
defer cancel() defer cancel()
blksize := bf.zbf.blksize // widen read request to be aligned with blksize granularity:
end := off + int64(len(dest)) // XXX overflow?
aoff := off - (off % zbf.blksize)
aend := end + (zbf.blksize - (end % zbf.blksize))
dest = make([]byte, aend - aoff)
// load all block(s) in parallel
wg, ctx := errgroup.WithContext(ctx)
for blkoff := aoff; blkoff < aend; blkoff += zbf.blksize {
blkoff := blkoff
blk := blkoff / zbf.blksize
wg.Go(func() error {
blkdata, err := zbf.LoadBlk(ctx, blk)
if err != nil {
return err
}
copy(dest[blkoff - aoff:], blkdata)
// store to kernel pagecache whole block that we've just loaded from database.
// This way, even if the user currently requested to read only small portion from it,
// it will prevent next e.g. consecutive user read request to again hit
// the DB, and instead will be served by kernel from its cache.
st := gfsconn.FileNotifyStoreCache(bfdata.Inode(), blkoff, blkdata)
if st != fuse.OK {
return fmt.Errorf("bigfile %s: blk %d: -> pagecache: %s", zbf.POid(), blk, st)
}
return nil
})
}
err := wg.Wait()
if err != nil {
log.Printf("%s", err) // XXX + /bigfile/XXX: read [a,b): -> ...
return nil, fuse.EIO
}
return fuse.ReadResultData(dest[off-aoff:end - (off-aoff)]), fuse.OK
} }
...@@ -514,6 +557,14 @@ func (cc *zodbCacheControl) WantEvict(obj zodb.IPersistent) bool { ...@@ -514,6 +557,14 @@ func (cc *zodbCacheControl) WantEvict(obj zodb.IPersistent) bool {
// XXX option to prevent starting if wcfs was already started ? // XXX option to prevent starting if wcfs was already started ?
// FIXME gfsconn is tmp workaround for lack of way to retrieve FileSystemConnector from nodefs.Inode
// TODO:
// - Inode += .Mount() -> nodefs.Mount
// - Mount:
// .Root() -> root Inode of the fs
// .Connector() -> FileSystemConnector through which fs is mounted
var gfsconn *nodefs.FileSystemConnector
func main() { func main() {
log.SetPrefix("wcfs: ") log.SetPrefix("wcfs: ")
...@@ -541,10 +592,11 @@ func main() { ...@@ -541,10 +592,11 @@ func main() {
root := nodefs.NewDefaultNode() root := nodefs.NewDefaultNode()
server, _, err := nodefs.MountRoot(mntpt, root, opts) server, fsconn, err := nodefs.MountRoot(mntpt, root, opts)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
gfsconn = fsconn // FIXME temp workaround (see ^^^)
// add entries to / // add entries to /
mkfile(root, ".wcfs", NewStaticFile([]byte(zurl))) mkfile(root, ".wcfs", NewStaticFile([]byte(zurl)))
......
...@@ -326,7 +326,7 @@ type zBigFileState ZBigFile // hide state methods from public API ...@@ -326,7 +326,7 @@ type zBigFileState ZBigFile // hide state methods from public API
// DropState implements zodb.Stateful. // DropState implements zodb.Stateful.
func (bf *zBigFileState) DropState() { func (bf *zBigFileState) DropState() {
bf.blksize = -1 bf.blksize = 0
bf.blktab = nil bf.blktab = nil
} }
...@@ -350,6 +350,9 @@ func (bf *zBigFileState) PySetState(pystate interface{}) (err error) { ...@@ -350,6 +350,9 @@ func (bf *zBigFileState) PySetState(pystate interface{}) (err error) {
if !ok { if !ok {
return fmt.Errorf("blksize: expect integer; got %s", typeOf(t[0])) return fmt.Errorf("blksize: expect integer; got %s", typeOf(t[0]))
} }
if blksize <= 0 {
return fmt.Errorf("blksize: must be > 0; got %d", blksize)
}
blktab, ok := t[1].(*btree.LOBTree) blktab, ok := t[1].(*btree.LOBTree)
if !ok { if !ok {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment