Skip to content

Commit bec18bc

Browse files
committed
Blockade testing
Fix an issue during retrieveSnapshot, where the membership updates from Zero haven't yet rolled in, with info about Alpha leader. Instead of relying solely upon that, we use the RaftContext in Snapshot to determine the leader, and get the connection. Do not destroy blockade, in case the tests fail. We'd like to be able to get the logs from the Dgraph servers.
1 parent 3c4df36 commit bec18bc

File tree

2 files changed

+32
-9
lines changed

2 files changed

+32
-9
lines changed

contrib/blockade/main.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -131,12 +131,6 @@ func waitForHealthy() error {
131131
}
132132

133133
func runTests() error {
134-
defer func() {
135-
if err := run(ctxb, "blockade destroy"); err != nil {
136-
log.Fatalf("While destroying: %v", err)
137-
}
138-
}()
139-
140134
for {
141135
if err := waitForHealthy(); err != nil {
142136
fmt.Printf("Error while waitForHealthy: %v\n.", err)
@@ -181,6 +175,15 @@ func main() {
181175
if err := run(ctxb, "blockade up"); err != nil {
182176
log.Fatal(err)
183177
}
178+
// This defer can be moved within runTests, if we want to destroy blockade,
179+
// in case our tests fail. We don't want to do that, because then we won't
180+
// be able to get the logs.
181+
defer func() {
182+
if err := run(ctxb, "blockade destroy"); err != nil {
183+
log.Fatalf("While destroying: %v", err)
184+
}
185+
}()
186+
184187
if err := runTests(); err != nil {
185188
os.Exit(1)
186189
}

worker/draft.go

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -531,9 +531,29 @@ func (n *node) Snapshot() (*pb.Snapshot, error) {
531531
}
532532

533533
func (n *node) retrieveSnapshot(snap pb.Snapshot) error {
534-
pool, err := n.leaderBlocking()
535-
if err != nil {
536-
return err
534+
// In some edge cases, the Zero leader might not have been able to update
535+
// the status of Alpha leader. So, instead of blocking forever on waiting
536+
// for Zero to send us the updates info about the leader, we can just use
537+
// the Snapshot RaftContext, which contains the address of the leader.
538+
var pool *conn.Pool
539+
addr := snap.Context.GetAddr()
540+
glog.V(2).Infof("Snapshot.RaftContext.Addr: %q", addr)
541+
if len(addr) > 0 {
542+
p, err := conn.Get().Get(addr)
543+
if err != nil {
544+
glog.V(2).Infof("conn.Get(%q) Error: %v", addr, err)
545+
} else {
546+
pool = p
547+
glog.V(2).Infof("Leader connection picked from RaftContext")
548+
}
549+
}
550+
if pool == nil {
551+
glog.V(2).Infof("No leader conn from RaftContext. Using membership state.")
552+
p, err := n.leaderBlocking()
553+
if err != nil {
554+
return err
555+
}
556+
pool = p
537557
}
538558

539559
// Need to clear pl's stored in memory for the case when retrieving snapshot with

0 commit comments

Comments
 (0)