Skip to content

Commit 148082d

Browse files
committed
TLA+ model based trace validation for etcd
1 parent 6061d6c commit 148082d

File tree

7 files changed

+3542
-0
lines changed

7 files changed

+3542
-0
lines changed

node.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,8 @@ func (n *node) run() {
440440
rd = Ready{}
441441
}
442442
readyc = nil
443+
444+
traceNodeEvent(rsmReady, r)
443445
case <-advancec:
444446
n.rn.Advance(rd)
445447
rd = Ready{}

raft.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,9 @@ type Config struct {
283283
// This behavior will become unconditional in the future. See:
284284
// https://github.com/etcd-io/raft/issues/83
285285
StepDownOnRemoval bool
286+
287+
// raft state tracer
288+
TraceLogger TraceLogger
286289
}
287290

288291
func (c *Config) validate() error {
@@ -427,6 +430,9 @@ type raft struct {
427430
// current term. Those will be handled as fast as first log is committed in
428431
// current term.
429432
pendingReadIndexMessages []pb.Message
433+
434+
traceLogger TraceLogger
435+
initStateTraced bool
430436
}
431437

432438
func newRaft(c *Config) *raft {
@@ -456,6 +462,7 @@ func newRaft(c *Config) *raft {
456462
disableProposalForwarding: c.DisableProposalForwarding,
457463
disableConfChangeValidation: c.DisableConfChangeValidation,
458464
stepDownOnRemoval: c.StepDownOnRemoval,
465+
traceLogger: c.TraceLogger,
459466
}
460467

461468
cfg, prs, err := confchange.Restore(confchange.Changer{
@@ -578,11 +585,13 @@ func (r *raft) send(m pb.Message) {
578585
// we err on the side of safety and omit a `&& !m.Reject` condition
579586
// above.
580587
r.msgsAfterAppend = append(r.msgsAfterAppend, m)
588+
traceSendMessage(r, &m)
581589
} else {
582590
if m.To == r.id {
583591
r.logger.Panicf("message should not be self-addressed when sending %s", m.Type)
584592
}
585593
r.msgs = append(r.msgs, m)
594+
traceSendMessage(r, &m)
586595
}
587596
}
588597

@@ -753,6 +762,8 @@ func (r *raft) appliedSnap(snap *pb.Snapshot) {
753762
// the commit index changed (in which case the caller should call
754763
// r.bcastAppend).
755764
func (r *raft) maybeCommit() bool {
765+
defer traceNodeEvent(rsmCommit, r)
766+
756767
mci := r.prs.Committed()
757768
return r.raftLog.maybeCommit(mci, r.Term)
758769
}
@@ -793,6 +804,9 @@ func (r *raft) appendEntry(es ...pb.Entry) (accepted bool) {
793804
for i := range es {
794805
es[i].Term = r.Term
795806
es[i].Index = li + 1 + uint64(i)
807+
if es[i].Type == pb.EntryNormal {
808+
traceNodeEvent(rsmReplicate, r)
809+
}
796810
}
797811
// Track the size of this uncommitted proposal.
798812
if !r.increaseUncommittedSize(es) {
@@ -868,6 +882,8 @@ func (r *raft) becomeFollower(term uint64, lead uint64) {
868882
r.lead = lead
869883
r.state = StateFollower
870884
r.logger.Infof("%x became follower at term %d", r.id, r.Term)
885+
886+
traceNodeEvent(rsmBecomeFollower, r)
871887
}
872888

873889
func (r *raft) becomeCandidate() {
@@ -881,6 +897,8 @@ func (r *raft) becomeCandidate() {
881897
r.Vote = r.id
882898
r.state = StateCandidate
883899
r.logger.Infof("%x became candidate at term %d", r.id, r.Term)
900+
901+
traceNodeEvent(rsmBecomeCandidate, r)
884902
}
885903

886904
func (r *raft) becomePreCandidate() {
@@ -904,6 +922,7 @@ func (r *raft) becomeLeader() {
904922
if r.state == StateFollower {
905923
panic("invalid transition [follower -> leader]")
906924
}
925+
907926
r.step = stepLeader
908927
r.reset(r.Term)
909928
r.tick = r.tickHeartbeat
@@ -926,6 +945,7 @@ func (r *raft) becomeLeader() {
926945
// could be expensive.
927946
r.pendingConfIndex = r.raftLog.lastIndex()
928947

948+
traceNodeEvent(rsmBecomeLeader, r)
929949
emptyEnt := pb.Entry{Data: nil}
930950
if !r.appendEntry(emptyEnt) {
931951
// This won't happen because we just called reset() above.
@@ -1049,6 +1069,9 @@ func (r *raft) poll(id uint64, t pb.MessageType, v bool) (granted int, rejected
10491069
}
10501070

10511071
func (r *raft) Step(m pb.Message) error {
1072+
traceInitStateOnce(r)
1073+
traceReceiveMessage(r, &m)
1074+
10521075
// Handle the message term, which may result in our stepping down to a follower.
10531076
switch {
10541077
case m.Term == 0:
@@ -1273,6 +1296,8 @@ func stepLeader(r *raft, m pb.Message) error {
12731296
cc = ccc
12741297
}
12751298
if cc != nil {
1299+
traceChangeConfEvent(cc, r)
1300+
12761301
alreadyPending := r.pendingConfIndex > r.raftLog.applied
12771302
alreadyJoint := len(r.prs.Config.Voters[1]) > 0
12781303
wantsLeaveJoint := len(cc.AsV2().Changes) == 0
@@ -1915,6 +1940,8 @@ func (r *raft) applyConfChange(cc pb.ConfChangeV2) pb.ConfState {
19151940
//
19161941
// The inputs usually result from restoring a ConfState or applying a ConfChange.
19171942
func (r *raft) switchToConfig(cfg tracker.Config, prs tracker.ProgressMap) pb.ConfState {
1943+
traceConfChangeEvent(cfg, r)
1944+
19181945
r.prs.Config = cfg
19191946
r.prs.Progress = prs
19201947

0 commit comments

Comments
 (0)