@@ -25,6 +25,7 @@ import (
25
25
"math/rand"
26
26
"strings"
27
27
"sync"
28
+ "sync/atomic"
28
29
"time"
29
30
30
31
"github.com/dgraph-io/badger/y"
@@ -35,6 +36,7 @@ import (
35
36
"github.com/golang/glog"
36
37
"go.etcd.io/etcd/raft"
37
38
"go.etcd.io/etcd/raft/raftpb"
39
+ otrace "go.opencensus.io/trace"
38
40
"golang.org/x/net/context"
39
41
)
40
42
@@ -143,7 +145,7 @@ func NewNode(rc *pb.RaftContext, store *raftwal.DiskStorage) *Node {
143
145
confChanges : make (map [uint64 ]chan error ),
144
146
messages : make (chan sendmsg , 100 ),
145
147
peers : make (map [uint64 ]string ),
146
- requestCh : make (chan linReadReq ),
148
+ requestCh : make (chan linReadReq , 100 ),
147
149
}
148
150
n .Applied .Init ()
149
151
// This should match up to the Applied index set above.
@@ -301,12 +303,18 @@ func (n *Node) PastLife() (uint64, bool, error) {
301
303
}
302
304
303
305
const (
304
- messageBatchSoftLimit = 10000000
306
+ messageBatchSoftLimit = 10e6
305
307
)
306
308
309
+ type Stream struct {
310
+ msgCh chan []byte
311
+ alive int32
312
+ }
313
+
307
314
func (n * Node ) BatchAndSendMessages () {
308
315
batches := make (map [uint64 ]* bytes.Buffer )
309
- failedConn := make (map [uint64 ]bool )
316
+ streams := make (map [uint64 ]* Stream )
317
+
310
318
for {
311
319
totalSize := 0
312
320
sm := <- n .messages
@@ -342,59 +350,106 @@ func (n *Node) BatchAndSendMessages() {
342
350
if buf .Len () == 0 {
343
351
continue
344
352
}
345
-
346
- addr , has := n .Peer (to )
347
- pool , err := Get ().Get (addr )
348
- if ! has || err != nil {
349
- if exists := failedConn [to ]; ! exists {
350
- // So that we print error only the first time we are not able to connect.
351
- // Otherwise, the log is polluted with multiple errors.
352
- glog .Warningf ("No healthy connection to node Id: %#x addr: [%s], err: %v\n " ,
353
- to , addr , err )
354
- failedConn [to ] = true
353
+ stream , ok := streams [to ]
354
+ if ! ok || atomic .LoadInt32 (& stream .alive ) <= 0 {
355
+ stream = & Stream {
356
+ msgCh : make (chan []byte , 100 ),
357
+ alive : 1 ,
355
358
}
356
- continue
359
+ go n .streamMessages (to , stream )
360
+ streams [to ] = stream
357
361
}
358
-
359
- failedConn [to ] = false
360
362
data := make ([]byte , buf .Len ())
361
363
copy (data , buf .Bytes ())
362
- go n .doSendMessage (to , pool , data )
363
364
buf .Reset ()
365
+
366
+ select {
367
+ case stream .msgCh <- data :
368
+ default :
369
+ }
364
370
}
365
371
}
366
372
}
367
373
368
- func (n * Node ) doSendMessage (to uint64 , pool * Pool , data []byte ) {
369
- ctx , cancel := context .WithTimeout (context .Background (), 2 * time .Second )
370
- defer cancel ()
371
-
372
- client := pool .Get ()
373
-
374
- c := pb .NewRaftClient (client )
375
- p := & api.Payload {Data : data }
376
- batch := & pb.RaftBatch {
377
- Context : n .RaftContext ,
378
- Payload : p ,
374
+ func (n * Node ) streamMessages (to uint64 , stream * Stream ) {
375
+ defer atomic .StoreInt32 (& stream .alive , 0 )
376
+
377
+ const dur = 10 * time .Second
378
+ deadline := time .Now ().Add (dur )
379
+ var lastLog time.Time
380
+ // Exit after a thousand tries or at least 10s. Let BatchAndSendMessages create another
381
+ // goroutine, if needed.
382
+ for i := 0 ; ; i ++ {
383
+ if err := n .doSendMessage (to , stream .msgCh ); err != nil {
384
+ // Update lastLog so we print error only a few times if we are not able to connect.
385
+ // Otherwise, the log is polluted with repeated errors.
386
+ if time .Since (lastLog ) > dur {
387
+ glog .Warningf ("Unable to send message to peer: %#x. Error: %v" , to , err )
388
+ }
389
+ lastLog = time .Now ()
390
+ }
391
+ if i >= 1e3 {
392
+ if time .Now ().After (deadline ) {
393
+ return
394
+ }
395
+ i = 0
396
+ }
379
397
}
398
+ }
380
399
381
- // We don't need to run this in a goroutine, because doSendMessage is
382
- // already being run in one.
383
- _ , err := c .RaftMessage (ctx , batch )
400
+ func (n * Node ) doSendMessage (to uint64 , msgCh chan []byte ) error {
401
+ addr , has := n .Peer (to )
402
+ if ! has {
403
+ return x .Errorf ("Do not have address of peer %#x" , to )
404
+ }
405
+ pool , err := Get ().Get (addr )
384
406
if err != nil {
385
- switch {
386
- case strings .Contains (err .Error (), "TransientFailure" ):
387
- glog .Warningf ("Reporting node: %d addr: %s as unreachable." , to , pool .Addr )
388
- n .Raft ().ReportUnreachable (to )
389
- pool .SetUnhealthy ()
390
- default :
391
- glog .V (3 ).Infof ("Error while sending Raft message to node with addr: %s, err: %v\n " ,
392
- pool .Addr , err )
407
+ return err
408
+ }
409
+ c := pb .NewRaftClient (pool .Get ())
410
+ mc , err := c .RaftMessage (context .Background ())
411
+ if err != nil {
412
+ return err
413
+ }
414
+
415
+ slurp := func (batch * pb.RaftBatch ) {
416
+ for {
417
+ if len (batch .Payload .Data ) > messageBatchSoftLimit {
418
+ return
419
+ }
420
+ select {
421
+ case data := <- msgCh :
422
+ batch .Payload .Data = append (batch .Payload .Data , data ... )
423
+ default :
424
+ return
425
+ }
426
+ }
427
+ }
428
+ ctx := mc .Context ()
429
+ for {
430
+ select {
431
+ case data := <- msgCh :
432
+ batch := & pb.RaftBatch {
433
+ Context : n .RaftContext ,
434
+ Payload : & api.Payload {Data : data },
435
+ }
436
+ slurp (batch ) // Pick up more entries from msgCh, if present.
437
+ if err := mc .Send (batch ); err != nil {
438
+ switch {
439
+ case strings .Contains (err .Error (), "TransientFailure" ):
440
+ glog .Warningf ("Reporting node: %d addr: %s as unreachable." , to , pool .Addr )
441
+ n .Raft ().ReportUnreachable (to )
442
+ pool .SetUnhealthy ()
443
+ default :
444
+ }
445
+ // We don't need to do anything if we receive any error while sending message.
446
+ // RAFT would automatically retry.
447
+ return err
448
+ }
449
+ case <- ctx .Done ():
450
+ return ctx .Err ()
393
451
}
394
452
}
395
- // We don't need to do anything if we receive any error while sending message.
396
- // RAFT would automatically retry.
397
- return
398
453
}
399
454
400
455
// Connects the node and makes its peerPool refer to the constructed pool and address
@@ -508,21 +563,29 @@ type linReadReq struct {
508
563
var errReadIndex = x .Errorf ("Cannot get linearized read (time expired or no configured leader)" )
509
564
510
565
func (n * Node ) WaitLinearizableRead (ctx context.Context ) error {
511
- indexCh := make (chan uint64 , 1 )
566
+ span := otrace .FromContext (ctx )
567
+ span .Annotate (nil , "WaitLinearizableRead" )
512
568
569
+ indexCh := make (chan uint64 , 1 )
513
570
select {
514
571
case n .requestCh <- linReadReq {indexCh : indexCh }:
572
+ span .Annotate (nil , "Pushed to requestCh" )
515
573
case <- ctx .Done ():
574
+ span .Annotate (nil , "Context expired" )
516
575
return ctx .Err ()
517
576
}
518
577
519
578
select {
520
579
case index := <- indexCh :
580
+ span .Annotatef (nil , "Received index: %d" , index )
521
581
if index == 0 {
522
582
return errReadIndex
523
583
}
524
- return n .Applied .WaitForMark (ctx , index )
584
+ err := n .Applied .WaitForMark (ctx , index )
585
+ span .Annotatef (nil , "Error from Applied.WaitForMark: %v" , err )
586
+ return err
525
587
case <- ctx .Done ():
588
+ span .Annotate (nil , "Context expired" )
526
589
return ctx .Err ()
527
590
}
528
591
}
@@ -532,7 +595,7 @@ func (n *Node) RunReadIndexLoop(closer *y.Closer, readStateCh <-chan raft.ReadSt
532
595
readIndex := func () (uint64 , error ) {
533
596
// Read Request can get rejected then we would wait idefinitely on the channel
534
597
// so have a timeout.
535
- ctx , cancel := context .WithTimeout (context .Background (), 3 * time .Second )
598
+ ctx , cancel := context .WithTimeout (context .Background (), time .Second )
536
599
defer cancel ()
537
600
538
601
var activeRctx [8 ]byte
@@ -548,6 +611,7 @@ func (n *Node) RunReadIndexLoop(closer *y.Closer, readStateCh <-chan raft.ReadSt
548
611
return 0 , errors .New ("Closer has been called" )
549
612
case rs := <- readStateCh :
550
613
if ! bytes .Equal (activeRctx [:], rs .RequestCtx ) {
614
+ glog .V (1 ).Infof ("Read state: %x != requested %x" , rs .RequestCtx , activeRctx [:])
551
615
goto again
552
616
}
553
617
return rs .Index , nil
0 commit comments