Commit 5a26fb31 authored by Kirill Smelkov's avatar Kirill Smelkov

go/zodb/fs1tools: Verify

Add utility to verify FileStorage data for consistency.
To verify we just need to iterate through all records, because
FileStorage driver performs all consistency checks by itself.

Mimic normal output to be the same as in fstest from ZODB/py.
Example runs of fstest.py and `fs1 verify` on a broken file:

    $ python ~/src/wendelin/z/ZODB/src/ZODB/scripts/fstest.py -v 1.fs
             4: transaction tid 0x03e044f6448c8022 #0
           213: transaction tid 0x03e044f646e044bb #1
    1.fs has data records that extend beyond the transaction record; end at 466

    $ fs1 verify -v 1.fs
             4: transaction tid 0x03e044f6448c8022 #0
           213: transaction tid 0x03e044f646e044bb #1
    2021/05/24 12:43:37 fsverify: 1.fs: 1.fs: transaction record @355: -> (iter data): 1.fs: data record @416: check: data record [..., 466) overlaps txn boundary [..., 458)

As can be seen, in fs1 case, the error contains more details: [start, end) of
both mismatching transaction and data records.

In addition to fstest-like verbosity, add progress-mode, where % of total
completion is printed in a style similar to one used by `fs1 verify-index`.

The Go-based implementation is also faster even when data is on HDD. For
example on a 73GB database provided by @jerome[1] fsrefs.py takes ~15 minutes
to run and occupy ~70-100% of CPU. On the other hand `fs1 verify` takes ~7
minutes to run and occupy ~ 20-30% of CPU.

Tests pending.

[1] nexedi/zodbtools!19 (comment 129480)
parent c66dc12d
// Copyright (C) 2017 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
// Copyright (C) 2017-2021 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
......@@ -30,7 +30,7 @@ var commands = prog.CommandRegistry{
{"reindex", reindexSummary, reindexUsage, reindexMain},
{"verify-index", verifyIdxSummary, verifyIdxUsage, verifyIdxMain},
// recover (fsrecover.py)
// verify (fstest.py)
{"verify", verifySummary, verifyUsage, verifyMain},
// XXX repozo ?
}
......
// Copyright (C) 2021 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
package fs1tools
// verify subcommand
//
// verification output mimics fstest from ZODB/py as originally written by Jeremy Hylton:
// https://github.com/zopefoundation/ZODB/blob/5.6.0-35-g1fb097b41/src/ZODB/scripts/fstest.py
import (
"flag"
"fmt"
"io"
"os"
"time"
"lab.nexedi.com/kirr/neo/go/zodb/storage/fs1"
"lab.nexedi.com/kirr/go123/prog"
"lab.nexedi.com/kirr/go123/xflag"
"lab.nexedi.com/kirr/go123/xfmt"
)
// Verify verifies content of a FileStorage file @ path.
//
// Only data part of the database is verified (the *.fs file).
// Use VerifyIndexFor to verify the index part (*.fs.index).
func Verify(w io.Writer, path string, verbose int, progress bool) (err error) {
// just iterate through the file and emit progress.
// the FileStorage driver implements all consistency checks by itself.
fi, err := os.Stat(path)
if err != nil {
return fmt.Errorf("verify: %s: %s", path, err)
}
fsize := fi.Size()
v := &Verifier{verbose: verbose}
// display progress updates once per tick
if progress {
tick := time.NewTicker(time.Second / 4)
defer tick.Stop()
xcr := ""
if verbose > 0 {
xcr = "\n"
}
v.progress = func(force bool) error {
if !force {
select {
case <-tick.C:
default:
return nil
}
}
_, err := fmt.Fprintf(w,
"\rVerified data bytes: %.1f%% (%d/%d); #txn: %d%s",
100 * float64(v.donePos) / float64(fsize),
v.donePos, fsize,
v.ntxn,
xcr)
return err
}
}
return Dump(w, path, fs1.IterForward, v)
}
// Verifier implements Dumper that is used by Verify.
type Verifier struct {
ntxn int // current transaction record #
verbose int // >=1 (print txn) >=2 (print objects)
// for loading data
dhLoading fs1.DataHeader
donePos int64 // done verifying till this position
progress func(force bool) error // called after each transaction if !nil
}
func (v *Verifier) DumperName() string {
return "fsverify"
}
func (v *Verifier) DumpFileHeader(buf *xfmt.Buffer, fh *fs1.FileHeader) error {
return nil
}
func (v *Verifier) DumpTxn(buf *xfmt.Buffer, it *fs1.Iter) error {
txnh := &it.Txnh
for i := 0; ; i++ {
err := it.NextData()
if err != nil {
if err == io.EOF {
break
}
return err
}
dh := &it.Datah
// load data
v.dhLoading = *dh
dbuf, err := v.dhLoading.LoadData(it.R)
if err != nil {
return err
}
if v.verbose >= 2 {
buf .S(fmt.Sprintf("%10d: object oid 0x%s #%d\n", dh.Pos, dh.Oid, i))
}
dbuf.Release()
}
if v.verbose >= 1 {
buf .S(fmt.Sprintf("%10d: transaction tid 0x%s #%d \n", txnh.Pos, txnh.Tid, v.ntxn))
}
v.ntxn++
if v.progress != nil {
v.donePos = txnh.Pos + txnh.Len
err := v.progress(/*force=*/false)
if err != nil {
return err
}
}
return nil
}
func (v *Verifier) DumpEndOK(buf *xfmt.Buffer) error {
if v.progress != nil {
err := v.progress(/*force=*/true)
if err != nil {
return err
}
}
if v.verbose >= 1 {
buf .S("no errors detected\n")
}
return nil
}
// ----------------------------------------
const verifySummary = "verify database content"
func verifyUsage(w io.Writer) {
fmt.Fprintf(w,
`Usage: fs1 verify [options] <storage>
Verify FileStorage records for consistency
<storage> is a path to FileStorage
options:
-h --help this help text.
-v increase verbosity.
-p display progress.
`)
}
func verifyMain(argv []string) {
verbose := 0
var progress bool
flags := flag.FlagSet{Usage: func() { verifyUsage(os.Stderr) }}
flags.Init("", flag.ExitOnError)
flags.Var((*xflag.Count)(&verbose), "v", "verbosity level")
flags.BoolVar(&progress, "p", false, "display progress")
flags.Parse(argv[1:])
argv = flags.Args()
if len(argv) < 1 {
flags.Usage()
prog.Exit(2)
}
storPath := argv[0]
err := Verify(os.Stdout, storPath, verbose, progress)
if err != nil {
prog.Fatal(err)
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment