Skip to content

Commit 3c4df36

Browse files
authored
Integrate flaky, slow, stop and partition test cases (#2751)
* Must wait for the cluster to converge before starting tests. * Blockade tool works nicely. Tested 9 combinations of partitions, all work OK. * Testing all 4 edge cases: Flaky, slow, stop, partition
1 parent 50b0484 commit 3c4df36

File tree

3 files changed

+111
-36
lines changed

3 files changed

+111
-36
lines changed

contrib/blockade/blockade.yml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,33 +17,33 @@ containers:
1717
expose:
1818
- 5080
1919
- 6080
20-
command: /gobin/dgraph zero --my=zero1:5080 --replicas 3 --idx 1 --bindall --expose_trace --profile_mode block --block_rate 10 --logtostderr -v=2
20+
command: /gobin/dgraph zero --my=zero1:5080 --replicas 3 --idx 1 --bindall --expose_trace --logtostderr -v=2
2121
volumes: {"/home/mrjn/go/bin": "/gobin"}
2222

2323
zero2:
2424
image: dgraph/dgraph:latest
2525
hostname: "zero2"
2626
container_name: "zero2"
2727
ports:
28-
- 5081
29-
- 6081
28+
- 5082
29+
- 6082
3030
expose:
31-
- 5081
32-
- 6081
33-
command: /gobin/dgraph zero -o 1 --my=zero2:5081 --replicas 3 --peer=zero1:5080 --idx 2 --bindall --expose_trace --profile_mode block --block_rate 10 --logtostderr -v=2
31+
- 5082
32+
- 6082
33+
command: /gobin/dgraph zero -o 2 --my=zero2:5082 --replicas 3 --peer=zero1:5080 --idx 2 --bindall --expose_trace --logtostderr -v=2
3434
volumes: {"/home/mrjn/go/bin": "/gobin"}
3535

3636
zero3:
3737
image: dgraph/dgraph:latest
3838
hostname: "zero3"
3939
container_name: "zero3"
4040
ports:
41-
- 5082
42-
- 6082
41+
- 5083
42+
- 6083
4343
expose:
44-
- 5082
45-
- 6082
46-
command: /gobin/dgraph zero -o 2 --my=zero3:5082 --replicas 3 --peer=zero1:5080 --idx 3 --bindall --expose_trace --profile_mode block --block_rate 10 --logtostderr -v=2
44+
- 5083
45+
- 6083
46+
command: /gobin/dgraph zero -o 3 --my=zero3:5083 --replicas 3 --peer=zero1:5080 --idx 3 --bindall --expose_trace --logtostderr -v=2
4747
volumes:
4848
"/home/mrjn/go/bin": "/gobin"
4949

contrib/blockade/main.go

Lines changed: 97 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@ package main
33
import (
44
"bytes"
55
"context"
6+
"encoding/json"
67
"fmt"
78
"log"
89
"math/rand"
10+
"os"
911
"os/exec"
1012
"strings"
1113
"time"
@@ -20,10 +22,10 @@ func run(ctx context.Context, command string) error {
2022
cmd.Stdout = &out
2123
cmd.Stderr = &out
2224
if err := cmd.Run(); err != nil {
23-
fmt.Printf("ERROR. Command %s. Error: %v. Output:\n%s\n", command, err, out.String())
25+
fmt.Printf("ERROR. Command %q. Error: %v. Output:\n%s\n", command, err, out.String())
2426
return err
2527
}
26-
fmt.Printf("Command %s. Output:\n%s\n", command, out.String())
28+
fmt.Printf("Command %q. Output:\n%s\n", command, out.String())
2729
return nil
2830
}
2931

@@ -49,65 +51,138 @@ func increment(atLeast int) error {
4951
return err
5052
}
5153
}
52-
fmt.Printf("Time taken to converge %d: %s\n",
53-
atLeast, time.Since(start).Round(time.Millisecond))
54+
dur := time.Since(start).Round(time.Millisecond)
55+
fmt.Printf("\n===> TIME taken to converge %d alphas: %s\n\n", atLeast, dur)
5456
return nil
5557
}
5658

57-
func testPartitions() error {
59+
func getStatus(zero string) error {
60+
cmd := exec.Command("http", "GET", fmt.Sprintf("%s/state", zero))
61+
var out bytes.Buffer
62+
cmd.Stdout = &out
63+
cmd.Stderr = &out
64+
if err := cmd.Run(); err != nil {
65+
fmt.Printf("ERROR. Status at %s. Error: %v. Output:\n%s\n", zero, err, out.String())
66+
return err
67+
}
68+
output := out.String()
69+
if strings.Contains(output, "errors") {
70+
fmt.Printf("ERROR. Status at %s. Output:\n%s\n", zero, output)
71+
return fmt.Errorf(output)
72+
}
73+
var m map[string]interface{}
74+
if err := json.Unmarshal([]byte(output), &m); err != nil {
75+
return err
76+
}
77+
pretty, err := json.MarshalIndent(m, "", " ")
78+
if err != nil {
79+
return err
80+
}
81+
fmt.Printf("Status at %s:\n%s\n", zero, pretty)
82+
return nil
83+
}
84+
85+
func testCommon(remove, join string, minAlphasUp int) error {
5886
var nodes []string
5987
for i := 1; i <= 3; i++ {
6088
for j := 1; j <= 3; j++ {
6189
nodes = append(nodes, fmt.Sprintf("zero%d dg%d", i, j))
6290
}
6391
}
6492

65-
fmt.Printf("Nodes: %v\n", nodes)
93+
fmt.Printf("Nodes: %+v\n", nodes)
6694
for _, node := range nodes {
67-
// First partition.
68-
if err := run(ctxb, "http GET localhost:6080/state"); err != nil {
95+
if err := getStatus("localhost:6080"); err != nil {
6996
return err
7097
}
71-
fmt.Printf("\n==> Partitioning NODE: %s\n", node)
72-
if err := partition(node); err != nil {
98+
fmt.Printf("\n==> Remove cmd %q on NODES: %s\n", remove, node)
99+
if err := run(ctxb, remove+" "+node); err != nil {
73100
return err
74101
}
75102
if err := run(ctxb, "blockade status"); err != nil {
76103
return err
77104
}
78-
if err := increment(2); err != nil {
105+
if err := increment(minAlphasUp); err != nil {
79106
return err
80107
}
81108
// Then join.
82-
if err := run(ctxb, "blockade join"); err != nil {
109+
if err := run(ctxb, join); err != nil {
83110
return err
84111
}
85112
if err := increment(3); err != nil {
86113
return err
87114
}
88115
}
89-
fmt.Println("testPartitions: OK")
90116
return nil
91117
}
92118

93-
func main() {
94-
rand.Seed(time.Now().UnixNano())
95-
fmt.Println("Starting blockade")
96-
if err := run(ctxb, "blockade up"); err != nil {
97-
log.Fatal(err)
119+
func waitForHealthy() error {
120+
for _, zero := range []string{"localhost:6080", "localhost:6082", "localhost:6083"} {
121+
if err := getStatus(zero); err != nil {
122+
return err
123+
}
124+
}
125+
for _, alpha := range []string{"localhost:9180", "localhost:9182", "localhost:9183"} {
126+
if err := run(ctxb, "increment --addr="+alpha); err != nil {
127+
return err
128+
}
98129
}
130+
return nil
131+
}
99132

133+
func runTests() error {
100134
defer func() {
101135
if err := run(ctxb, "blockade destroy"); err != nil {
102136
log.Fatalf("While destroying: %v", err)
103137
}
104138
}()
105-
if err := run(ctxb,
106-
"increment --addr=localhost:9180"); err != nil {
107-
fmt.Printf("Error during increment: %v\n", err)
139+
140+
for {
141+
if err := waitForHealthy(); err != nil {
142+
fmt.Printf("Error while waitForHealthy: %v\n.", err)
143+
time.Sleep(5 * time.Second)
144+
fmt.Println("Retrying...")
145+
} else {
146+
break
147+
}
148+
}
149+
150+
// Setting flaky --all just does not converge. Too many network interruptions.
151+
if err := testCommon("blockade flaky", "blockade fast --all", 3); err != nil {
152+
fmt.Printf("Error testFlaky: %v\n", err)
153+
return err
154+
}
155+
fmt.Println("===> Flaky TEST: OK")
156+
157+
if err := testCommon("blockade slow", "blockade fast --all", 3); err != nil {
158+
fmt.Printf("Error testSlow: %v\n", err)
159+
return err
160+
}
161+
fmt.Println("===> Slow TEST: OK")
162+
163+
if err := testCommon("blockade stop", "blockade start --all", 2); err != nil {
164+
fmt.Printf("Error testRestart: %v\n", err)
165+
return err
108166
}
167+
fmt.Println("===> Restart TEST: OK")
109168

110-
if err := testPartitions(); err != nil {
169+
if err := testCommon("blockade partition", "blockade join", 2); err != nil {
111170
fmt.Printf("Error testPartitions: %v\n", err)
171+
return err
172+
}
173+
fmt.Println("===> Partition TEST: OK")
174+
175+
return nil
176+
}
177+
178+
func main() {
179+
rand.Seed(time.Now().UnixNano())
180+
fmt.Println("Starting blockade")
181+
if err := run(ctxb, "blockade up"); err != nil {
182+
log.Fatal(err)
183+
}
184+
if err := runTests(); err != nil {
185+
os.Exit(1)
112186
}
187+
fmt.Println("Blockade tests: OK")
113188
}

contrib/integration/increment/main.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,13 @@ func main() {
117117

118118
for *num > 0 {
119119
cnt, err := process(dg, *ro)
120+
now := time.Now().UTC().Format("0102 03:04:05.999")
120121
if err != nil {
121-
fmt.Printf("While trying to process counter: %v. Retrying...\n", err)
122+
fmt.Printf("%-17s While trying to process counter: %v. Retrying...\n", now, err)
122123
time.Sleep(time.Second)
123124
continue
124125
}
125-
fmt.Printf("%-17s Counter VAL: %d [ Ts: %d ]\n",
126-
time.Now().UTC().Format("0102 03:04:05.999"), cnt.Val, cnt.startTs)
126+
fmt.Printf("%-17s Counter VAL: %d [ Ts: %d ]\n", now, cnt.Val, cnt.startTs)
127127
*num -= 1
128128
time.Sleep(waitDur)
129129
}

0 commit comments

Comments
 (0)