Commit 0b751f74 authored by Kirill Smelkov's avatar Kirill Smelkov

Merge branch 'x/go' into t

* x/go:
  .
  .
  .
  X notes on partition table
  .
  .
  .
  .
  .
  .
parents cb46ccd2 0d0ce246
Wire proto
----------
# all in network byte-order
PktHeader
.id u32 // autoincrement on ask (?)
.code u16 // defined by seqno if register(...) call in Packets
.length u32 // whole pkt length
# response_code = 0x8000 | code
List
.len u32
[len]items
Dict
.len u32
[len] key, value
Enum
.value s32 // None -> -1
String
.len u32
[len] strdata
Address
.host String
.port u16 // present only if .host != ''
Bool
.value u8
Number
.value u32
Index
.value u64
PTID
.value u64 // 0 <-> None
Protocol
Number // encode <- version, decode -> check version
Checksum
.checksum [20]u8
UUID
.uuid s32 // 0 <-> None
TID
.tid [8]byte // None <-> \xff*8
POID = PTID
~~~~~~~~
NodeList []
type NodeType
address Address
uuid UUID
state NodeState
CellList []
uuid UUID
state CellState
RowList []
offset Number // u32
cell_list CellList
HistoryList []
serial TID
size Number // u32
UUIDList []
uuid UUID
TidList []
tid TID
OidList []
oid OID
~~~~~~~~
Notify
.message String
Error
.code Number
.message String
Ping
ø
_answer = PFEmpty
CloseClient
ø
RequestIdentification
.protocol_version PProtocol
.node_type NodeType
.uuid UUID
.address Address
.name String
TODO
Storage related messages
------------------------
NotifyReady S -> M
Recovery PM -> S S -> PM
LastIDs PM -> S S -> PM
PartitionTable PM -> S S -> PM
NotifyPartitionTable PM -> S, C
PartitionChanges PM -> S, C // subset of NotifyPartitionTable (?)
StartOperation PM -> S
StopOperation PM -> S
UnfinishedTransactions S -> PM PM -> S
LockedTransactions PM -> S S -> PM
FinalTID * -> S , C -> PM (?)
ValidateTransaction PM -> S
NotifyTransactionFinished M -> S
LockInformation PM -> S S -> PM
UnlockInformation PM -> S
StoreObject C -> S
.oid OID
.serial TID // original serial
.data String
.data_serial TID
.tid tid // current txn id
.unlock bool
AnswerStoreObject S -> C
.conflicting bool
.oid OID
.serial TID
GetObject C -> S
.oid
.serial
.tid
AnswerGetObject S -> C
.oid OID
.serial_start TID
.serial_end TID
.data String
.data_serial TID
AbortTransaction C -> S, PM
.tid
StoreTransaction C -> S S -> C
.tid TID
user, description, ext String
oidv []OID
VoteTransaction C -> S S -> C
.tid
TIDList C -> S
first, last Index // u64 [first, last)
partition Number // u32
AnswerTIDList S -> C
tidv []TID
TIDListFrom C -> S
min_tid, max_tid TID
length Number
partition Number
AnswerTIDListFrom S -> C
tidv []TID
TransactionInformation * -> S
tid TID
AnswerTransactionInformation S -> *
tid TID
user, description, extension String
packed bool
oidv []OID
ObjectHistory C -> S
oid OID
first, last Index // [first, last]
AnswerObjectHistory
oid OID
historyv []strict{serial TID; size Number}
ObjectUndoSerial C -> S
tid TID
ltid TID // ?
undone_tid TID // ?
oidv []OID
AnswerObjectUndoSerial S -> C
{} oid ->
.current_serial TID
.undo_serial TID
.is_current bool
HasLock C -> S
tid TID
oid OID
AnswerHasLock S -> C
oid OID
lock_state LockState // not_locket, granted, granted_to_other:w
CheckCurrentSerial C -> S AnswerCheckCurrentSerial S -> C
tid TID conflicting bool
serial TID oid OID
oid OID serial TID
Pack C -> M -> S
tid TID
CheckPartition M -> S
partition Number
upstream_name String
address Address
min_tid, max_tid TID
CheckTIDRange S -> S AnswerCheckTIDRange S -> S
partition Number count Number
length Number checksum Checksum
min_tid, max_tid TID max_tid TID
CheckSerialRange S -> S
ParitionCorrupted S -> M
partition Number
uuidv []UUID
~~~~~~~~
(?)
NotifyNodeIntormation
NodeInformation
TODO
Tables
------
- config
.name str
.value str
(name, nid, partitions, ptid, replicas, version, zodb=pickle...)
- pt
.rid int // = row id = part of oid space
.nid int
.state tinyint // = cell state
pkey (rid, nid)
# committed txns
- trans
.partition smallint
.tid bigint
.packed bool
.oids mediumblob // []oid
.user blob
.description blob
.ext blob
.ttid bigint // XXX ?
pkey (partition, tid)
# committed object metadata
- obj
.partition smallint
.oid bigint
.tid bigint
.data_id bigint | NULL // -> data.id
.value_tid bigint | NULL // XXX ? (у нас NULL)
pkey (partition, tid, oid)
key (partition, oid, tid)
key data_id
# object data
- data
.id bigint
.hash sha1 // UNIQUE (hash, compression)
.compression tinyint
.value mediumblob
key id
key (hash, compression) // <- from UNIQUE ^^^
- (bigdata)
# uncommitted transactions
# (= trans)
- ttrans
.partition smallint
.tid bigint
.packed bool
.oids mediumblob // []oid
.user blob
.description blob
.ext blob
ttid bigint // XXX ?
# uncommitted object metadata
# (= obj)
- tobj
Partition Table (general & current-py)
--------------------------------------
#Np (how-many partitions) #R (replication factor)
Cell
.node (-> .uuid, .addr)
.state
.backup_tid # last tid this cell has all data for
.replicating # currently replicating up to this (.replicating) tid
PT
.id↑
.partition_list [#Np] of []Cell
.count_dict {} node -> #node_used_in_pt
Pt
+-+
| |
+-+ +----------+ +------------+ +-----+
| | |node,state| |node2,state2| |cell3| ...
+-+ +----------+ +------------+ +-----+
Np | |
+-+
| |
+-+ oid -> PTentry (as PT[oid % Np]
| | tid
+-+
----------------------------------------
BaseApplication
.em EventManager
.nm NodeManager
......
// XXX license
// filestorage support XXX text
package neo
type FileStorage struct {
fd int
}
// IStorage
var _ IStorage = (*FileStorage)(nil)
type TxnRecHead struct {
Tid Tid
RecLenm8 uint64
Status TxnStatus
//UserLen uint16
//DescriptionLen uint16
//ExtensionLen uint16
User []byte // TODO Encode ^^^
Description []byte
Extension []byte
Datav []DataRec
}
type DataRec struct {
Oid Oid
Tid Tid
PrevDataRecPos uint64 // previous-record file-position
TxnPos uint64 // beginning of transaction record file position
// 2-bytes with zero values. (Was version length.)
//DataLen uint64
Data []byte
DataRecPos uint64 // if Data == nil -> byte position of data record containing data
}
func (TxnRecHead *rh) MarshalFS() []byte {
panic("TODO")
}
func (TxnRecHead *rh) UnmarshalFS(data []byte) {
TODO
}
func NewFileStorage(path string) (*FileStorage, error) {
fd, err := ...Open(path, O_RDONLY)
if err != nil {
return nil, err
}
// TODO read file header
Read(fd, 4) != "FS21" -> invalid header
return &FileStorage{fd: fd}
}
func (f *FileStorage) Close() error {
err := Os.Close(f.fd)
if err != nil {
return err
}
f.fd = -1
return nil
}
func (f *FileStorage) Iterate(start, stop Tid) IStorageIterator {
if start != TID0 || stop != TIDMAX {
panic("TODO start/stop support")
}
}
This diff is collapsed.
// TODO text what it does (generates code for pkt.go)
// +build ignore
package main
import (
"fmt"
"go/ast"
"go/parser"
"go/token"
)
var _ = ast.Print
func main() {
fset := token.NewFileSet()
var mode parser.Mode = 0 // parser.Trace
f, err := parser.ParseFile(fset, "pkt.go", nil, mode)
if err != nil {
panic(err) // XXX log
}
ncode := 0
// ast.Print(fset, f)
// return
for _, decl := range f.Decls {
// we look for types (which can be only under GenDecl)
gdecl, ok := decl.(*ast.GenDecl)
if !ok || gdecl.Tok != token.TYPE {
continue
}
for _, spec := range gdecl.Specs {
tspec := spec.(*ast.TypeSpec) // must be because tok = TYPE
tname := tspec.Name.Name
// we are only interested in struct types
tstruct, ok := tspec.Type.(*ast.StructType)
if !ok {
continue
}
/*
fmt.Printf("%s:\n", tname)
fmt.Println(tstruct)
ast.Print(fset, tstruct)
*/
if ncode != 0 {
fmt.Println()
}
for _, fieldv := range tstruct.Fields.List {
// we only support simple types like uint16
ftype, ok := fieldv.Type.(*ast.Ident)
if !ok {
// TODO log
// TODO proper error message
panic(fmt.Sprintf("%#v not supported", fieldv.Type))
}
if len(fieldv.Names) != 0 {
for _, field := range fieldv.Names {
fmt.Printf("%s(%d).%s\t%s\n", tname, ncode, field.Name, ftype)
}
} else {
// no names means embedding
fmt.Printf("%s(%d).<%s>\n", tname, ncode, ftype)
}
}
ncode++
}
//fmt.Println(gdecl)
//ast.Print(fset, gdecl)
}
}
// XXX license
// ZODB types
package neo // XXX -> zodb ?
// XXX naming -> TID, OID ?
type Tid uint64 // XXX or [8]byte ?
type Oid uint64 // XXX or [8]byte ?
// XXX "extended" oid - oid + serial, completely specifying object revision
type Xid struct {
Tid
Oid
}
const (
INVALID_UUID UUID = 0
INVALID_TID TID = 1<<64 - 1 // 0xffffffffffffffff TODO recheck it is the same
INVALID_OID OID = 0xffffffffffffffff // 1<<64 - 1
ZERO_TID TID = 0 // XXX or simply TID{} ? // XXX -> TID0 ?
ZERO_OID OID = 0 // XXX or simply OID{} ? // XXX -> OID0
// OID_LEN = 8
// TID_LEN = 8
MAX_TID TID = 0x7fffffffffffffff // SQLite does not accept numbers above 2^63-1 // XXX -> TIDMAX ?
)
type TxnStatus byte
// TODO Tid.String(), Oid.String() +verbose, scanning (?)
// Information about single storage transaction
// XXX -> storage.ITransactionInformation
//type IStorageTransactionInformation interface {
type IStorageTransactionInformation struct {
Tid Tid
Status TxnStatus
User []byte
Description []byte
Extension []byte
// TODO iter -> IStorageRecordInformation
Iter() IStorageRecordIterator
}
// Information about single storage record
type StorageRecordInformation struct {
Oid Oid
Tid Tid
Data []byte
// XXX .version ?
// XXX .data_txn (The previous transaction id)
}
type IStorage interface {
Close() error
// TODO:
// Name()
// History(oid, size=1)
// LastTid()
// LoadBefore(oid Oid, beforeTid Tid) (data []bytes, tid Tid, err error)
// LoadSerial(oid Oid, serial Tid) (data []bytes, err error)
// PrefetchBefore(oidv []Oid, beforeTid Tid) error (?)
// Store(oid Oid, serial Tid, data []byte, txn ITransaction) error
// XXX Restore ?
// CheckCurrentSerialInTransaction(oid Oid, serial Tid, txn ITransaction) // XXX naming
// tpc_begin(txn)
// tpc_vote(txn)
// tpc_finish(txn, callback) XXX clarify about callback
// tpc_abort(txn)
Iterate(start, stop Tid) IStorageIterator // XXX -> Iter() ?
}
type IStorageIterator interface {
Next() (*StorageTransactionInformation, error) // XXX -> NextTxn() ?
}
type IStorageRecordIterator interface { // XXX naming -> IRecordIterator
Next() (*StorageRecordInformation, error) // XXX vs ptr & nil ?
// XXX -> NextTxnObject() ?
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment