diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index b9efd9aaa28c5..9bf87f2370531 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1161,6 +1161,14 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M))) return false; + // No need to create `memcpy(a <- a)`. + if (BAA.isMustAlias(M->getDest(), MDep->getSource())) { + // Remove the instruction we're replacing. + eraseInstruction(M); + ++NumMemCpyInstr; + return true; + } + // If the dest of the second might alias the source of the first, then the // source and dest might overlap. In addition, if the source of the first // points to constant memory, they won't overlap by definition. Otherwise, we diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll index 48698f25174e7..1d55c733da831 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -139,6 +139,37 @@ define void @test6_memcpy(ptr %src, ptr %dest) nounwind { ret void } +; When forwarding to memcpy(arg+1, arg+1), we don't need to create this memcpy. +define void @test6_memcpy_forward_back(ptr %arg) nounwind { +; CHECK-LABEL: @test6_memcpy_forward_back( +; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1 +; CHECK-NEXT: ret void +; + %tmp = alloca [16 x i8], align 1 + %src = getelementptr inbounds i8, ptr %arg, i64 1 + %dest = getelementptr inbounds i8, ptr %arg, i64 1 + call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %tmp, ptr align 1 %src, i32 16, i1 false) + call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %dest, ptr align 1 %tmp, i32 16, i1 false) + ret void +} + +; We have to retain this `memcpy(arg+2, arg+1)` forwarding. +define void @test6_memcpy_forward_not_back(ptr %arg) nounwind { +; CHECK-LABEL: @test6_memcpy_forward_not_back( +; CHECK-NEXT: [[TMP:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1 +; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2 +; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[TMP]], ptr align 1 [[SRC]], i32 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[DEST]], ptr align 1 [[TMP]], i32 16, i1 false) +; CHECK-NEXT: ret void +; + %tmp = alloca [16 x i8], align 1 + %src = getelementptr inbounds i8, ptr %arg, i64 1 + %dest = getelementptr inbounds i8, ptr %arg, i64 2 + call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %tmp, ptr align 1 %src, i32 16, i1 false) + call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %dest, ptr align 1 %tmp, i32 16, i1 false) + ret void +} @x = external global %0