Skip to content

Commit 4ee50ea

Browse files
committed
lock_resolver: support verifying primary for check_txn_status (tikv#777)
* support verifying primary for check_txn_status Signed-off-by: MyonKeminta <[email protected]> * update kvproto Signed-off-by: MyonKeminta <[email protected]> * add more failpoint usages Signed-off-by: MyonKeminta <[email protected]> * update depencency and fix test Signed-off-by: MyonKeminta <[email protected]> * Do not skip for unistore; refine logs Signed-off-by: MyonKeminta <[email protected]> * Address comments Signed-off-by: MyonKeminta <[email protected]> --------- Signed-off-by: MyonKeminta <[email protected]> Co-authored-by: MyonKeminta <[email protected]>
1 parent d4aab1e commit 4ee50ea

File tree

3 files changed

+150
-1
lines changed

3 files changed

+150
-1
lines changed

integration_tests/lock_test.go

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ import (
5757
"github.com/tikv/client-go/v2/txnkv"
5858
"github.com/tikv/client-go/v2/txnkv/transaction"
5959
"github.com/tikv/client-go/v2/txnkv/txnlock"
60+
"github.com/tikv/client-go/v2/util"
6061
)
6162

6263
var getMaxBackoff = tikv.ConfigProbe{}.GetGetMaxBackoff()
@@ -65,6 +66,10 @@ func TestLock(t *testing.T) {
6566
suite.Run(t, new(testLockSuite))
6667
}
6768

69+
func TestLockWithTiKV(t *testing.T) {
70+
suite.Run(t, new(testLockWithTiKVSuite))
71+
}
72+
6873
type testLockSuite struct {
6974
suite.Suite
7075
store tikv.StoreProbe
@@ -1007,3 +1012,109 @@ func (s *testLockSuite) TestLockWaitTimeLimit() {
10071012
s.Nil(txn1.Rollback())
10081013
s.Nil(txn2.Rollback())
10091014
}
1015+
1016+
type testLockWithTiKVSuite struct {
1017+
suite.Suite
1018+
store tikv.StoreProbe
1019+
}
1020+
1021+
func (s *testLockWithTiKVSuite) SetupTest() {
1022+
if *withTiKV {
1023+
s.store = tikv.StoreProbe{KVStore: NewTestStore(s.T())}
1024+
} else {
1025+
s.store = tikv.StoreProbe{KVStore: NewTestUniStore(s.T())}
1026+
}
1027+
}
1028+
1029+
func (s *testLockWithTiKVSuite) TearDownTest() {
1030+
s.store.Close()
1031+
}
1032+
1033+
func (s *testLockWithTiKVSuite) TestCheckTxnStatusSentToSecondary() {
1034+
s.NoError(failpoint.Enable("tikvclient/beforeAsyncPessimisticRollback", `return("skip")`))
1035+
s.NoError(failpoint.Enable("tikvclient/twoPCRequestBatchSizeLimit", "return"))
1036+
s.NoError(failpoint.Enable("tikvclient/shortPessimisticLockTTL", "return"))
1037+
s.NoError(failpoint.Enable("tikvclient/twoPCShortLockTTL", "return"))
1038+
defer func() {
1039+
s.NoError(failpoint.Disable("tikvclient/beforeAsyncPessimisticRollback"))
1040+
s.NoError(failpoint.Disable("tikvclient/twoPCRequestBatchSizeLimit"))
1041+
s.NoError(failpoint.Disable("tikvclient/shortPessimisticLockTTL"))
1042+
s.NoError(failpoint.Disable("tikvclient/twoPCShortLockTTL"))
1043+
}()
1044+
1045+
k1 := []byte("k1")
1046+
k2 := []byte("k2")
1047+
k3 := []byte("k3")
1048+
1049+
ctx := context.WithValue(context.Background(), util.SessionID, uint64(1))
1050+
1051+
txn, err := s.store.Begin()
1052+
s.NoError(err)
1053+
txn.SetPessimistic(true)
1054+
1055+
// Construct write conflict to make the LockKeys operation fail.
1056+
{
1057+
txn2, err := s.store.Begin()
1058+
s.NoError(err)
1059+
s.NoError(txn2.Set(k3, []byte("v3")))
1060+
s.NoError(txn2.Commit(ctx))
1061+
}
1062+
1063+
lockCtx := kv.NewLockCtx(txn.StartTS(), 200, time.Now())
1064+
err = txn.LockKeys(ctx, lockCtx, k1, k2, k3)
1065+
s.IsType(&tikverr.ErrWriteConflict{}, errors.Cause(err))
1066+
1067+
// At this time: txn's primary is unsetted, and the keys:
1068+
// * k1: stale pessimistic lock, primary
1069+
// * k2: stale pessimistic lock, primary -> k1
1070+
1071+
forUpdateTS, err := s.store.CurrentTimestamp(oracle.GlobalTxnScope)
1072+
s.NoError(err)
1073+
lockCtx = kv.NewLockCtx(forUpdateTS, 200, time.Now())
1074+
err = txn.LockKeys(ctx, lockCtx, k3) // k3 becomes primary
1075+
err = txn.LockKeys(ctx, lockCtx, k1)
1076+
s.Equal(k3, txn.GetCommitter().GetPrimaryKey())
1077+
1078+
// At this time:
1079+
// * k1: pessimistic lock, primary -> k3
1080+
// * k2: stale pessimistic lock, primary -> k1
1081+
// * k3: pessimistic lock, primary
1082+
1083+
s.NoError(txn.Set(k1, []byte("v1-1")))
1084+
s.NoError(txn.Set(k3, []byte("v3-1")))
1085+
1086+
s.NoError(failpoint.Enable("tikvclient/beforeCommitSecondaries", `return("skip")`))
1087+
defer func() {
1088+
s.NoError(failpoint.Disable("tikvclient/beforeCommitSecondaries"))
1089+
}()
1090+
1091+
s.NoError(txn.Commit(ctx))
1092+
1093+
// At this time:
1094+
// * k1: prewritten, primary -> k3
1095+
// * k2: stale pessimistic lock, primary -> k1
1096+
// * k3: committed
1097+
1098+
// Trigger resolving lock on k2
1099+
{
1100+
txn2, err := s.store.Begin()
1101+
s.NoError(err)
1102+
txn2.SetPessimistic(true)
1103+
lockCtx = kv.NewLockCtx(txn2.StartTS(), 200, time.Now())
1104+
s.NoError(txn2.LockKeys(ctx, lockCtx, k2))
1105+
s.NoError(txn2.Rollback())
1106+
}
1107+
1108+
// Check data consistency
1109+
readTS, err := s.store.CurrentTimestamp(oracle.GlobalTxnScope)
1110+
s.NoError(err)
1111+
snapshot := s.store.GetSnapshot(readTS)
1112+
v, err := snapshot.Get(ctx, k3)
1113+
s.NoError(err)
1114+
s.Equal([]byte("v3-1"), v)
1115+
_, err = snapshot.Get(ctx, k2)
1116+
s.Equal(tikverr.ErrNotExist, err)
1117+
v, err = snapshot.Get(ctx, k1)
1118+
s.NoError(err)
1119+
s.Equal([]byte("v1-1"), v)
1120+
}

txnkv/transaction/2pc.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ import (
4040
"encoding/hex"
4141
"math"
4242
"math/rand"
43+
"strconv"
4344
"strings"
4445
"sync"
4546
"sync/atomic"
@@ -1641,6 +1642,20 @@ func (c *twoPhaseCommitter) execute(ctx context.Context) (err error) {
16411642
logutil.Logger(ctx).Info("[failpoint] injected delay before commit",
16421643
zap.Uint64("txnStartTS", c.startTS), zap.Duration("duration", duration))
16431644
time.Sleep(duration)
1645+
} else if strings.HasPrefix(action, "delay(") && strings.HasSuffix(action, ")") {
1646+
durationStr := action[6:]
1647+
durationStr = durationStr[:len(durationStr)-1]
1648+
millis, err := strconv.ParseUint(durationStr, 10, 64)
1649+
if err != nil {
1650+
panic("failed to parse delay duration: " + durationStr)
1651+
}
1652+
duration := time.Millisecond * time.Duration(millis)
1653+
logutil.Logger(ctx).Info("[failpoint] injected delay before commit",
1654+
zap.Uint64("txnStartTS", c.startTS), zap.Duration("duration", duration))
1655+
time.Sleep(duration)
1656+
} else {
1657+
logutil.Logger(ctx).Info("[failpoint] unknown failpoint config",
1658+
zap.Uint64("txnStartTS", c.startTS), zap.String("config", action))
16441659
}
16451660
}
16461661
}

txnkv/txnlock/lock_resolver.go

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,15 @@ func (lr *LockResolver) resolveLocks(bo *retry.Backoffer, opts ResolveLocksOptio
439439
var resolve func(*Lock, bool) (TxnStatus, error)
440440
resolve = func(l *Lock, forceSyncCommit bool) (TxnStatus, error) {
441441
status, err := lr.getTxnStatusFromLock(bo, l, callerStartTS, forceSyncCommit, detail)
442-
if err != nil {
442+
443+
if _, ok := errors.Cause(err).(primaryMismatch); ok {
444+
if l.LockType != kvrpcpb.Op_PessimisticLock {
445+
logutil.BgLogger().Info("unexpected primaryMismatch error occurred on a non-pessimistic lock", zap.Stringer("lock", l), zap.Error(err))
446+
return TxnStatus{}, err
447+
}
448+
// Pessimistic rollback the pessimistic lock as it points to an invalid primary.
449+
status, err = TxnStatus{}, nil
450+
} else if err != nil {
443451
return TxnStatus{}, err
444452
}
445453
if status.ttl != 0 {
@@ -672,6 +680,14 @@ func (e txnNotFoundErr) Error() string {
672680
return e.TxnNotFound.String()
673681
}
674682

683+
type primaryMismatch struct {
684+
currentLock *kvrpcpb.LockInfo
685+
}
686+
687+
func (e primaryMismatch) Error() string {
688+
return "primary mismatch, current lock: " + e.currentLock.String()
689+
}
690+
675691
// getTxnStatus sends the CheckTxnStatus request to the TiKV server.
676692
// When rollbackIfNotExist is false, the caller should be careful with the txnNotFoundErr error.
677693
func (lr *LockResolver) getTxnStatus(bo *retry.Backoffer, txnID uint64, primary []byte,
@@ -701,6 +717,7 @@ func (lr *LockResolver) getTxnStatus(bo *retry.Backoffer, txnID uint64, primary
701717
RollbackIfNotExist: rollbackIfNotExist,
702718
ForceSyncCommit: forceSyncCommit,
703719
ResolvingPessimisticLock: resolvingPessimisticLock,
720+
VerifyIsPrimary: true,
704721
}, kvrpcpb.Context{
705722
RequestSource: util.RequestSourceFromCtx(bo.GetCtx()),
706723
})
@@ -735,6 +752,12 @@ func (lr *LockResolver) getTxnStatus(bo *retry.Backoffer, txnID uint64, primary
735752
return status, txnNotFoundErr{txnNotFound}
736753
}
737754

755+
if p := keyErr.GetPrimaryMismatch(); p != nil && resolvingPessimisticLock {
756+
err = primaryMismatch{currentLock: p.GetLockInfo()}
757+
logutil.BgLogger().Info("getTxnStatus was called on secondary lock", zap.Error(err))
758+
return status, err
759+
}
760+
738761
err = errors.Errorf("unexpected err: %s, tid: %v", keyErr, txnID)
739762
logutil.BgLogger().Error("getTxnStatus error", zap.Error(err))
740763
return status, err

0 commit comments

Comments
 (0)