Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 7ba7168

Browse files
[SimplifyLibCalls] Add dereferenceable bytes from known callsites
Summary: int mm(char *a, char *b) { return memcmp(a,b,16); } Currently: define dso_local i32 @mm(i8* nocapture readonly %a, i8* nocapture readonly %b) local_unnamed_addr #1 { entry: %call = tail call i32 @memcmp(i8* %a, i8* %b, i64 16) ret i32 %call } After patch: define dso_local i32 @mm(i8* nocapture readonly %a, i8* nocapture readonly %b) local_unnamed_addr #1 { entry: %call = tail call i32 @memcmp(i8* dereferenceable(16) %a, i8* dereferenceable(16) %b, i64 16) ret i32 %call } Reviewers: jdoerfert, efriedma Reviewed By: jdoerfert Subscribers: javed.absar, spatel, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D66079 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368657 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 22809c3 commit 7ba7168

22 files changed

+870
-361
lines changed

include/llvm/Transforms/Utils/SimplifyLibCalls.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,9 +167,9 @@ class LibCallSimplifier {
167167
Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B);
168168
Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B);
169169
Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B);
170-
Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B);
171-
Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B);
172-
Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B);
170+
Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false);
171+
Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false);
172+
Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false);
173173
Value *optimizeRealloc(CallInst *CI, IRBuilder<> &B);
174174
Value *optimizeWcslen(CallInst *CI, IRBuilder<> &B);
175175
// Wrapper for all String/Memory Library Call Optimizations

lib/Transforms/Utils/SimplifyLibCalls.cpp

Lines changed: 58 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,20 @@ static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
186186
return true;
187187
}
188188

189+
static void annotateDereferenceableBytes(CallInst *CI,
190+
ArrayRef<unsigned> ArgNos,
191+
uint64_t DerefBytes) {
192+
for (unsigned ArgNo : ArgNos) {
193+
if (CI->getDereferenceableBytes(ArgNo + AttributeList::FirstArgIndex) <
194+
DerefBytes) {
195+
CI->removeParamAttr(ArgNo, Attribute::Dereferenceable);
196+
CI->removeParamAttr(ArgNo, Attribute::DereferenceableOrNull);
197+
CI->addParamAttr(ArgNo, Attribute::getWithDereferenceableBytes(
198+
CI->getContext(), DerefBytes));
199+
}
200+
}
201+
}
202+
189203
//===----------------------------------------------------------------------===//
190204
// String and Memory Library Call Optimizations
191205
//===----------------------------------------------------------------------===//
@@ -765,9 +779,11 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
765779
ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
766780

767781
// memchr(x, y, 0) -> null
768-
if (LenC && LenC->isZero())
769-
return Constant::getNullValue(CI->getType());
770-
782+
if (LenC) {
783+
if (LenC->isZero())
784+
return Constant::getNullValue(CI->getType());
785+
annotateDereferenceableBytes(CI, {0}, LenC->getZExtValue());
786+
}
771787
// From now on we need at least constant length and string.
772788
StringRef Str;
773789
if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false))
@@ -926,10 +942,12 @@ Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
926942
return Constant::getNullValue(CI->getType());
927943

928944
// Handle constant lengths.
929-
if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size))
945+
if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) {
930946
if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS,
931947
LenC->getZExtValue(), B, DL))
932948
return Res;
949+
annotateDereferenceableBytes(CI, {0, 1}, LenC->getZExtValue());
950+
}
933951

934952
return nullptr;
935953
}
@@ -955,18 +973,31 @@ Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) {
955973
return optimizeMemCmpBCmpCommon(CI, B);
956974
}
957975

958-
Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
976+
Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B,
977+
bool isIntrinsic) {
978+
Value *Size = CI->getArgOperand(2);
979+
if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size))
980+
annotateDereferenceableBytes(CI, {0, 1}, LenC->getZExtValue());
981+
982+
if (isIntrinsic)
983+
return nullptr;
984+
959985
// memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
960-
B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
961-
CI->getArgOperand(2));
986+
B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size);
962987
return CI->getArgOperand(0);
963988
}
964989

965-
Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
990+
Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B, bool isIntrinsic) {
991+
Value *Size = CI->getArgOperand(2);
992+
if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size))
993+
annotateDereferenceableBytes(CI, {0, 1}, LenC->getZExtValue());
994+
995+
if (isIntrinsic)
996+
return nullptr;
997+
966998
// memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
967-
B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
968-
CI->getArgOperand(2));
969-
return CI->getArgOperand(0);
999+
B.CreateMemMove( CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size);
1000+
return CI->getArgOperand(0);
9701001
}
9711002

9721003
/// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).
@@ -1015,13 +1046,21 @@ Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilder<> &B) {
10151046
return Calloc;
10161047
}
10171048

1018-
Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
1049+
Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B,
1050+
bool isIntrinsic) {
1051+
Value *Size = CI->getArgOperand(2);
1052+
if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size))
1053+
annotateDereferenceableBytes(CI, {0}, LenC->getZExtValue());
1054+
1055+
if (isIntrinsic)
1056+
return nullptr;
1057+
10191058
if (auto *Calloc = foldMallocMemset(CI, B))
10201059
return Calloc;
10211060

10221061
// memset(p, v, n) -> llvm.memset(align 1 p, v, n)
10231062
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
1024-
B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
1063+
B.CreateMemSet(CI->getArgOperand(0), Val, Size, 1);
10251064
return CI->getArgOperand(0);
10261065
}
10271066

@@ -2710,6 +2749,12 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
27102749
case Intrinsic::sqrt:
27112750
return optimizeSqrt(CI, Builder);
27122751
// TODO: Use foldMallocMemset() with memset intrinsic.
2752+
case Intrinsic::memset:
2753+
return optimizeMemSet(CI, Builder, true);
2754+
case Intrinsic::memcpy:
2755+
return optimizeMemCpy(CI, Builder, true);
2756+
case Intrinsic::memmove:
2757+
return optimizeMemMove(CI, Builder, true);
27132758
default:
27142759
return nullptr;
27152760
}

test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ target datalayout = "e-p:64:64:64"
66
; it has a TBAA tag which declares that it is unrelated.
77

88
; CHECK: @foo
9-
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %p, i8* align 1 %q, i64 16, i1 false), !tbaa !0
9+
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 dereferenceable(16) %p, i8* align 1 dereferenceable(16) %q, i64 16, i1 false), !tbaa !0
1010
; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa [[TAGA:!.*]]
1111
; CHECK-NEXT: ret void
1212
define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind {

test/Transforms/InstCombine/ARM/strcmp.ll

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
12
; Test that the strcmp library call simplifier works correctly.
23
; RUN: opt < %s -instcombine -S | FileCheck %s
34

@@ -13,10 +14,11 @@ declare i32 @strcmp(i8*, i8*)
1314
; strcmp("", x) -> -*x
1415
define arm_aapcscc i32 @test1(i8* %str2) {
1516
; CHECK-LABEL: @test1(
16-
; CHECK: %strcmpload = load i8, i8* %str
17-
; CHECK: %1 = zext i8 %strcmpload to i32
18-
; CHECK: %2 = sub nsw i32 0, %1
19-
; CHECK: ret i32 %2
17+
; CHECK-NEXT: [[STRCMPLOAD:%.*]] = load i8, i8* [[STR2:%.*]], align 1
18+
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[STRCMPLOAD]] to i32
19+
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 0, [[TMP1]]
20+
; CHECK-NEXT: ret i32 [[TMP2]]
21+
;
2022

2123
%str1 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
2224
%temp1 = call arm_apcscc i32 @strcmp(i8* %str1, i8* %str2)
@@ -27,9 +29,10 @@ define arm_aapcscc i32 @test1(i8* %str2) {
2729
; strcmp(x, "") -> *x
2830
define arm_aapcscc i32 @test2(i8* %str1) {
2931
; CHECK-LABEL: @test2(
30-
; CHECK: %strcmpload = load i8, i8* %str
31-
; CHECK: %1 = zext i8 %strcmpload to i32
32-
; CHECK: ret i32 %1
32+
; CHECK-NEXT: [[STRCMPLOAD:%.*]] = load i8, i8* [[STR1:%.*]], align 1
33+
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[STRCMPLOAD]] to i32
34+
; CHECK-NEXT: ret i32 [[TMP1]]
35+
;
3336

3437
%str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
3538
%temp1 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2)
@@ -39,7 +42,8 @@ define arm_aapcscc i32 @test2(i8* %str1) {
3942
; strcmp(x, y) -> cnst
4043
define arm_aapcscc i32 @test3() {
4144
; CHECK-LABEL: @test3(
42-
; CHECK: ret i32 -1
45+
; CHECK-NEXT: ret i32 -1
46+
;
4347

4448
%str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
4549
%str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
@@ -49,7 +53,8 @@ define arm_aapcscc i32 @test3() {
4953

5054
define arm_aapcscc i32 @test4() {
5155
; CHECK-LABEL: @test4(
52-
; CHECK: ret i32 1
56+
; CHECK-NEXT: ret i32 1
57+
;
5358

5459
%str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
5560
%str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
@@ -61,8 +66,10 @@ define arm_aapcscc i32 @test4() {
6166
; (This transform is rather difficult to trigger in a useful manner)
6267
define arm_aapcscc i32 @test5(i1 %b) {
6368
; CHECK-LABEL: @test5(
64-
; CHECK: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5)
65-
; CHECK: ret i32 %memcmp
69+
; CHECK-NEXT: [[STR2:%.*]] = select i1 [[B:%.*]], i8* getelementptr inbounds ([5 x i8], [5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @bell, i32 0, i32 0)
70+
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* dereferenceable(5) getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* dereferenceable(5) [[STR2]], i32 5)
71+
; CHECK-NEXT: ret i32 [[MEMCMP]]
72+
;
6673

6774
%str1 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
6875
%temp1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
@@ -75,7 +82,8 @@ define arm_aapcscc i32 @test5(i1 %b) {
7582
; strcmp(x,x) -> 0
7683
define arm_aapcscc i32 @test6(i8* %str) {
7784
; CHECK-LABEL: @test6(
78-
; CHECK: ret i32 0
85+
; CHECK-NEXT: ret i32 0
86+
;
7987

8088
%temp1 = call arm_aapcscc i32 @strcmp(i8* %str, i8* %str)
8189
ret i32 %temp1
@@ -84,10 +92,11 @@ define arm_aapcscc i32 @test6(i8* %str) {
8492
; strcmp("", x) -> -*x
8593
define arm_aapcs_vfpcc i32 @test1_vfp(i8* %str2) {
8694
; CHECK-LABEL: @test1_vfp(
87-
; CHECK: %strcmpload = load i8, i8* %str
88-
; CHECK: %1 = zext i8 %strcmpload to i32
89-
; CHECK: %2 = sub nsw i32 0, %1
90-
; CHECK: ret i32 %2
95+
; CHECK-NEXT: [[STRCMPLOAD:%.*]] = load i8, i8* [[STR2:%.*]], align 1
96+
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[STRCMPLOAD]] to i32
97+
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 0, [[TMP1]]
98+
; CHECK-NEXT: ret i32 [[TMP2]]
99+
;
91100

92101
%str1 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
93102
%temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
@@ -98,9 +107,10 @@ define arm_aapcs_vfpcc i32 @test1_vfp(i8* %str2) {
98107
; strcmp(x, "") -> *x
99108
define arm_aapcs_vfpcc i32 @test2_vfp(i8* %str1) {
100109
; CHECK-LABEL: @test2_vfp(
101-
; CHECK: %strcmpload = load i8, i8* %str
102-
; CHECK: %1 = zext i8 %strcmpload to i32
103-
; CHECK: ret i32 %1
110+
; CHECK-NEXT: [[STRCMPLOAD:%.*]] = load i8, i8* [[STR1:%.*]], align 1
111+
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[STRCMPLOAD]] to i32
112+
; CHECK-NEXT: ret i32 [[TMP1]]
113+
;
104114

105115
%str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
106116
%temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
@@ -110,7 +120,8 @@ define arm_aapcs_vfpcc i32 @test2_vfp(i8* %str1) {
110120
; strcmp(x, y) -> cnst
111121
define arm_aapcs_vfpcc i32 @test3_vfp() {
112122
; CHECK-LABEL: @test3_vfp(
113-
; CHECK: ret i32 -1
123+
; CHECK-NEXT: ret i32 -1
124+
;
114125

115126
%str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
116127
%str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
@@ -120,7 +131,8 @@ define arm_aapcs_vfpcc i32 @test3_vfp() {
120131

121132
define arm_aapcs_vfpcc i32 @test4_vfp() {
122133
; CHECK-LABEL: @test4_vfp(
123-
; CHECK: ret i32 1
134+
; CHECK-NEXT: ret i32 1
135+
;
124136

125137
%str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
126138
%str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
@@ -132,8 +144,10 @@ define arm_aapcs_vfpcc i32 @test4_vfp() {
132144
; (This transform is rather difficult to trigger in a useful manner)
133145
define arm_aapcs_vfpcc i32 @test5_vfp(i1 %b) {
134146
; CHECK-LABEL: @test5_vfp(
135-
; CHECK: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5)
136-
; CHECK: ret i32 %memcmp
147+
; CHECK-NEXT: [[STR2:%.*]] = select i1 [[B:%.*]], i8* getelementptr inbounds ([5 x i8], [5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @bell, i32 0, i32 0)
148+
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* dereferenceable(5) getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* dereferenceable(5) [[STR2]], i32 5)
149+
; CHECK-NEXT: ret i32 [[MEMCMP]]
150+
;
137151

138152
%str1 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
139153
%temp1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
@@ -146,7 +160,8 @@ define arm_aapcs_vfpcc i32 @test5_vfp(i1 %b) {
146160
; strcmp(x,x) -> 0
147161
define arm_aapcs_vfpcc i32 @test6_vfp(i8* %str) {
148162
; CHECK-LABEL: @test6_vfp(
149-
; CHECK: ret i32 0
163+
; CHECK-NEXT: ret i32 0
164+
;
150165

151166
%temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str, i8* %str)
152167
ret i32 %temp1

0 commit comments

Comments
 (0)