From 7d1c3d4881647e53d0e317e21f3b31349120062f Mon Sep 17 00:00:00 2001
From: Luqman Aden <me@luqman.ca>
Date: Wed, 22 Mar 2017 19:16:39 +0000
Subject: [PATCH 1/5] Preserve nonnull metadata on Loads through SROA &
 mem2reg.

Summary:
https://llvm.org/bugs/show_bug.cgi?id=31142 :

SROA was dropping the nonnull metadata on loads from allocas that got optimized out. This patch simply preserves nonnull metadata on loads through SROA and mem2reg.

Reviewers: chandlerc, efriedma

Reviewed By: efriedma

Subscribers: hfinkel, spatel, efriedma, arielb1, davide, llvm-commits

Differential Revision: https://reviews.llvm.org/D27114

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298540 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/SROA.cpp                |  4 +
 .../Utils/PromoteMemoryToRegister.cpp         | 57 +++++++++---
 .../Mem2Reg/preserve-nonnull-load-metadata.ll | 89 +++++++++++++++++++
 test/Transforms/SROA/preserve-nonnull.ll      | 26 ++++++
 4 files changed, 166 insertions(+), 10 deletions(-)
 create mode 100644 test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll
 create mode 100644 test/Transforms/SROA/preserve-nonnull.ll
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index bfcb15530ef5..3ed2a51febaf 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2387,6 +2387,10 @@ class llvm::sroa::AllocaSliceRewriter
                                               LI.isVolatile(), LI.getName());
       if (LI.isVolatile())
         NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+
+      // Try to preserve nonnull metadata
+      if (TargetTy->isPointerTy())
+        NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
       V = NewLI;
 
       // If this is an integer load past the end of the slice (which means the
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 35faa6f65efd..0f891cdf3d4c 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -15,7 +15,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
@@ -23,6 +22,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/IteratedDominanceFrontier.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -38,6 +38,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -301,6 +302,18 @@ struct PromoteMem2Reg {
 
 } // end of anonymous namespace
 
+/// Given a LoadInst LI this adds assume(LI != null) after it.
+static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
+  Function *AssumeIntrinsic =
+      Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
+  ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
+                                       Constant::getNullValue(LI->getType()));
+  LoadNotNull->insertAfter(LI);
+  CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
+  CI->insertAfter(LoadNotNull);
+  AC->registerAssumption(CI);
+}
+
 static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
   // Knowing that this alloca is promotable, we know that it's safe to kill all
   // instructions except for load and store.
@@ -334,9 +347,9 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
 /// and thus must be phi-ed with undef. We fall back to the standard alloca
 /// promotion algorithm in that case.
 static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
-                                     LargeBlockInfo &LBI,
-                                     DominatorTree &DT,
-                                     AliasSetTracker *AST) {
+                                     LargeBlockInfo &LBI, DominatorTree &DT,
+                                     AliasSetTracker *AST,
+                                     AssumptionCache *AC) {
   StoreInst *OnlyStore = Info.OnlyStore;
   bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
   BasicBlock *StoreBB = OnlyStore->getParent();
@@ -387,6 +400,14 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
     // code.
     if (ReplVal == LI)
       ReplVal = UndefValue::get(LI->getType());
+
+    // If the load was marked as nonnull we don't want to lose
+    // that information when we erase this Load. So we preserve
+    // it with an assume.
+    if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+        !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
+      addAssumeNonNull(AC, LI);
+
     LI->replaceAllUsesWith(ReplVal);
     if (AST && LI->getType()->isPointerTy())
       AST->deleteValue(LI);
@@ -435,7 +456,9 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
 ///  }
 static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
                                      LargeBlockInfo &LBI,
-                                     AliasSetTracker *AST) {
+                                     AliasSetTracker *AST,
+                                     DominatorTree &DT,
+                                     AssumptionCache *AC) {
   // The trickiest case to handle is when we have large blocks. Because of this,
   // this code is optimized assuming that large blocks happen.  This does not
   // significantly pessimize the small block case.  This uses LargeBlockInfo to
@@ -476,10 +499,17 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
         // There is no store before this load, bail out (load may be affected
         // by the following stores - see main comment).
         return false;
-    }
-    else
+    } else {
       // Otherwise, there was a store before this load, the load takes its value.
-      LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
+      // Note, if the load was marked as nonnull we don't want to lose that
+      // information when we erase it. So we preserve it with an assume.
+      Value *ReplVal = std::prev(I)->second->getOperand(0);
+      if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+          !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
+        addAssumeNonNull(AC, LI);
+
+      LI->replaceAllUsesWith(ReplVal);
+    }
 
     if (AST && LI->getType()->isPointerTy())
       AST->deleteValue(LI);
@@ -553,7 +583,7 @@ void PromoteMem2Reg::run() {
     // If there is only a single store to this value, replace any loads of
     // it that are directly dominated by the definition with the value stored.
     if (Info.DefiningBlocks.size() == 1) {
-      if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
+      if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST, AC)) {
         // The alloca has been processed, move on.
         RemoveFromAllocasList(AllocaNum);
         ++NumSingleStore;
@@ -564,7 +594,7 @@ void PromoteMem2Reg::run() {
     // If the alloca is only read and written in one basic block, just perform a
     // linear sweep over the block to eliminate it.
     if (Info.OnlyUsedInOneBlock &&
-        promoteSingleBlockAlloca(AI, Info, LBI, AST)) {
+        promoteSingleBlockAlloca(AI, Info, LBI, AST, DT, AC)) {
       // The alloca has been processed, move on.
       RemoveFromAllocasList(AllocaNum);
       continue;
@@ -940,6 +970,13 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
 
       Value *V = IncomingVals[AI->second];
 
+      // If the load was marked as nonnull we don't want to lose
+      // that information when we erase this Load. So we preserve
+      // it with an assume.
+      if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+          !llvm::isKnownNonNullAt(V, LI, &DT))
+        addAssumeNonNull(AC, LI);
+
       // Anything using the load now uses the current value.
       LI->replaceAllUsesWith(V);
       if (AST && LI->getType()->isPointerTy())
diff --git a/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll b/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll
new file mode 100644
index 000000000000..33a5b124c555
--- /dev/null
+++ b/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -mem2reg -S | FileCheck %s
+
+; This tests that mem2reg preserves the !nonnull metadata on loads
+; from allocas that get optimized out.
+
+; Check the case where the alloca in question has a single store.
+define float* @single_store(float** %arg) {
+; CHECK-LABEL: define float* @single_store
+; CHECK: %arg.load = load float*, float** %arg, align 8
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
+; CHECK: ret float* %arg.load
+entry:
+  %buf = alloca float*
+  %arg.load = load float*, float** %arg, align 8
+  store float* %arg.load, float** %buf, align 8
+  %buf.load = load float*, float **%buf, !nonnull !0
+  ret float* %buf.load
+}
+
+; Check the case where the alloca in question has more than one
+; store but still within one basic block.
+define float* @single_block(float** %arg) {
+; CHECK-LABEL: define float* @single_block
+; CHECK: %arg.load = load float*, float** %arg, align 8
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
+; CHECK: ret float* %arg.load
+entry:
+  %buf = alloca float*
+  %arg.load = load float*, float** %arg, align 8
+  store float* null, float** %buf, align 8
+  store float* %arg.load, float** %buf, align 8
+  %buf.load = load float*, float **%buf, !nonnull !0
+  ret float* %buf.load
+}
+
+; Check the case where the alloca in question has more than one
+; store and also reads ands writes in multiple blocks.
+define float* @multi_block(float** %arg) {
+; CHECK-LABEL: define float* @multi_block
+; CHECK-LABEL: entry:
+; CHECK: %arg.load = load float*, float** %arg, align 8
+; CHECK: br label %next
+; CHECK-LABEL: next:
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
+; CHECK: ret float* %arg.load
+entry:
+  %buf = alloca float*
+  %arg.load = load float*, float** %arg, align 8
+  store float* null, float** %buf, align 8
+  br label %next
+next:
+  store float* %arg.load, float** %buf, align 8
+  %buf.load = load float*, float** %buf, !nonnull !0
+  ret float* %buf.load
+}
+
+; Check that we don't add an assume if it's not
+; necessary i.e. the value is already implied to be nonnull
+define float* @no_assume(float** %arg) {
+; CHECK-LABEL: define float* @no_assume
+; CHECK-LABEL: entry:
+; CHECK: %arg.load = load float*, float** %arg, align 8
+; CHECK: %cn = icmp ne float* %arg.load, null
+; CHECK: br i1 %cn, label %next, label %fin
+; CHECK-LABEL: next:
+; CHECK-NOT: call void @llvm.assume
+; CHECK: ret float* %arg.load
+; CHECK-LABEL: fin:
+; CHECK: ret float* null
+entry:
+  %buf = alloca float*
+  %arg.load = load float*, float** %arg, align 8
+  %cn = icmp ne float* %arg.load, null
+  br i1 %cn, label %next, label %fin
+next:
+; At this point the above nonnull check ensures that
+; the value %arg.load is nonnull in this block and thus
+; we need not add the assume.
+  store float* %arg.load, float** %buf, align 8
+  %buf.load = load float*, float** %buf, !nonnull !0
+  ret float* %buf.load
+fin:
+  ret float* null
+}
+
+!0 = !{}
diff --git a/test/Transforms/SROA/preserve-nonnull.ll b/test/Transforms/SROA/preserve-nonnull.ll
new file mode 100644
index 000000000000..fc5ce6a445fa
--- /dev/null
+++ b/test/Transforms/SROA/preserve-nonnull.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+;
+; Make sure that SROA doesn't lose nonnull metadata
+; on loads from allocas that get optimized out.
+
+; CHECK-LABEL: define float* @yummy_nonnull
+; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
+; CHECK: ret float* {{.*}}[[RETURN]]
+
+define float* @yummy_nonnull(float** %arg) {
+entry-block:
+	%buf = alloca float*
+
+	%_arg_i8 = bitcast float** %arg to i8*
+	%_buf_i8 = bitcast float** %buf to i8*
+	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
+
+	%ret = load float*, float** %buf, align 8, !nonnull !0
+	ret float* %ret
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+
+!0 = !{}

From 98de0be5cd00760c079c0dde7744b56b300df92f Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Mon, 26 Jun 2017 03:31:31 +0000
Subject: [PATCH 2/5] [InstCombine] Factor the logic for propagating !nonnull
 and !range metadata out of InstCombine and into helpers.

NFC, this just exposes the logic used by InstCombine when propagating
metadata from one load instruction to another. The plan is to use this
in SROA to address PR32902.

If anyone has better ideas about how to factor this or name variables,
I'm all ears, but this seemed like a pretty good start and lets us make
progress on the PR.

This is based on a patch by Ariel Ben-Yehuda (D34285).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306267 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Transforms/Utils/Local.h         | 13 +++++
 .../InstCombineLoadStoreAlloca.cpp            | 28 +---------
 lib/Transforms/Utils/Local.cpp                | 54 +++++++++++++++++--
 3 files changed, 64 insertions(+), 31 deletions(-)

diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 490a765c3fab..2d0f25b19e7c 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -366,6 +366,19 @@ unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
 /// during lowering by the GC infrastructure.
 bool callsGCLeafFunction(ImmutableCallSite CS);
 
+/// Copy a nonnull metadata node to a new load instruction.
+///
+/// This handles mapping it to range metadata if the new load is an integer
+/// load instead of a pointer load.
+void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI);
+
+/// Copy a range metadata node to a new load instruction.
+///
+/// This handles mapping it to nonnull metadata if the new load is a pointer
+/// load instead of an integer load and the range doesn't cover null.
+void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N,
+                       LoadInst &NewLI);
+
 //===----------------------------------------------------------------------===//
 //  Intrinsic pattern matching
 //
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 5789296b1ec9..1b126586c9a4 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -471,21 +471,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
       break;
 
     case LLVMContext::MD_nonnull:
-      // This only directly applies if the new type is also a pointer.
-      if (NewTy->isPointerTy()) {
-        NewLoad->setMetadata(ID, N);
-        break;
-      }
-      // If it's integral now, translate it to !range metadata.
-      if (NewTy->isIntegerTy()) {
-        auto *ITy = cast<IntegerType>(NewTy);
-        auto *NullInt = ConstantExpr::getPtrToInt(
-            ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
-        auto *NonNullInt =
-            ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
-        NewLoad->setMetadata(LLVMContext::MD_range,
-                             MDB.createRange(NonNullInt, NullInt));
-      }
+      copyNonnullMetadata(LI, N, *NewLoad);
       break;
     case LLVMContext::MD_align:
     case LLVMContext::MD_dereferenceable:
@@ -495,17 +481,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
         NewLoad->setMetadata(ID, N);
       break;
     case LLVMContext::MD_range:
-      // FIXME: It would be nice to propagate this in some way, but the type
-      // conversions make it hard.
-
-      // If it's a pointer now and the range does not contain 0, make it !nonnull.
-      if (NewTy->isPointerTy()) {
-        unsigned BitWidth = IC.getDataLayout().getTypeSizeInBits(NewTy);
-        if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
-          MDNode *NN = MDNode::get(LI.getContext(), None);
-          NewLoad->setMetadata(LLVMContext::MD_nonnull, NN);
-        }
-      }
+      copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
       break;
     }
   }
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 6e4174aa0cda..f877264a6ec9 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
@@ -1069,7 +1070,7 @@ static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr,
 }
 
 /// See if there is a dbg.value intrinsic for DIVar for the PHI node.
-static bool PhiHasDebugValue(DILocalVariable *DIVar, 
+static bool PhiHasDebugValue(DILocalVariable *DIVar,
                              DIExpression *DIExpr,
                              PHINode *APN) {
   // Since we can't guarantee that the original dbg.declare instrinsic
@@ -1152,7 +1153,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   DbgValue->insertAfter(LI);
 }
 
-/// Inserts a llvm.dbg.value intrinsic after a phi 
+/// Inserts a llvm.dbg.value intrinsic after a phi
 /// that has an associated llvm.dbg.decl intrinsic.
 void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            PHINode *APN, DIBuilder &Builder) {
@@ -1723,12 +1724,12 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
         // Preserve !invariant.group in K.
         break;
       case LLVMContext::MD_align:
-        K->setMetadata(Kind, 
+        K->setMetadata(Kind,
           MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
         break;
       case LLVMContext::MD_dereferenceable:
       case LLVMContext::MD_dereferenceable_or_null:
-        K->setMetadata(Kind, 
+        K->setMetadata(Kind,
           MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
         break;
     }
@@ -1812,6 +1813,49 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
   return false;
 }
 
+void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
+                               LoadInst &NewLI) {
+  auto *NewTy = NewLI.getType();
+
+  // This only directly applies if the new type is also a pointer.
+  if (NewTy->isPointerTy()) {
+    NewLI.setMetadata(LLVMContext::MD_nonnull, N);
+    return;
+  }
+
+  // The only other translation we can do is to integral loads with !range
+  // metadata.
+  if (!NewTy->isIntegerTy())
+    return;
+
+  MDBuilder MDB(NewLI.getContext());
+  const Value *Ptr = OldLI.getPointerOperand();
+  auto *ITy = cast<IntegerType>(NewTy);
+  auto *NullInt = ConstantExpr::getPtrToInt(
+      ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
+  auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
+  NewLI.setMetadata(LLVMContext::MD_range,
+                    MDB.createRange(NonNullInt, NullInt));
+}
+
+void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
+                             MDNode *N, LoadInst &NewLI) {
+  auto *NewTy = NewLI.getType();
+
+  // Give up unless it is converted to a pointer where there is a single very
+  // valuable mapping we can do reliably.
+  // FIXME: It would be nice to propagate this in more ways, but the type
+  // conversions make it hard.
+  if (!NewTy->isPointerTy())
+    return;
+
+  unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
+  if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
+    MDNode *NN = MDNode::get(OldLI.getContext(), None);
+    NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
+  }
+}
+
 namespace {
 /// A potential constituent of a bitreverse or bswap expression. See
 /// collectBitParts for a fuller explanation.
@@ -1933,7 +1977,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
       unsigned NumMaskedBits = AndMask.countPopulation();
       if (!MatchBitReversals && NumMaskedBits % 8 != 0)
         return Result;
-      
+
       auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
                                   MatchBitReversals, BPS);
       if (!Res)

From d45f4ff7d413f3e72fcd7f7f2efd31a299e75350 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Tue, 27 Jun 2017 02:23:15 +0000
Subject: [PATCH 3/5] [SROA] Clean up a test case a bit prior to adding more
 testing for nonnull as part of fixing PR32902.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306353 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/Transforms/SROA/preserve-nonnull.ll | 28 +++++++++++-------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/test/Transforms/SROA/preserve-nonnull.ll b/test/Transforms/SROA/preserve-nonnull.ll
index fc5ce6a445fa..f85104dfad7d 100644
--- a/test/Transforms/SROA/preserve-nonnull.ll
+++ b/test/Transforms/SROA/preserve-nonnull.ll
@@ -3,22 +3,20 @@
 ; Make sure that SROA doesn't lose nonnull metadata
 ; on loads from allocas that get optimized out.
 
-; CHECK-LABEL: define float* @yummy_nonnull
-; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
-; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
-; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
-; CHECK: ret float* {{.*}}[[RETURN]]
-
 define float* @yummy_nonnull(float** %arg) {
-entry-block:
-	%buf = alloca float*
-
-	%_arg_i8 = bitcast float** %arg to i8*
-	%_buf_i8 = bitcast float** %buf to i8*
-	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
-
-	%ret = load float*, float** %buf, align 8, !nonnull !0
-	ret float* %ret
+; CHECK-LABEL: define float* @yummy_nonnull(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[RETURN:.*]] = load float*, float** %arg, align 8
+; CHECK-NEXT:    %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
+; CHECK-NEXT:    call void @llvm.assume(i1 %[[ASSUME]])
+; CHECK-NEXT:    ret float* %[[RETURN]]
+entry:
+  %buf = alloca float*
+  %_arg_i8 = bitcast float** %arg to i8*
+  %_buf_i8 = bitcast float** %buf to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
+  %ret = load float*, float** %buf, align 8, !nonnull !0
+  ret float* %ret
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)

From 70e9bbd933bee76359eb18aa333f2630c72dee4b Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Tue, 27 Jun 2017 03:08:45 +0000
Subject: [PATCH 4/5] [SROA] Further test cleanup and add a test for the actual
 propagation of the nonnull attribute distinct from rewriting it into an
 assume.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306358 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/Transforms/SROA/preserve-nonnull.ll | 29 ++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/test/Transforms/SROA/preserve-nonnull.ll b/test/Transforms/SROA/preserve-nonnull.ll
index f85104dfad7d..1ff8bec3af91 100644
--- a/test/Transforms/SROA/preserve-nonnull.ll
+++ b/test/Transforms/SROA/preserve-nonnull.ll
@@ -3,8 +3,31 @@
 ; Make sure that SROA doesn't lose nonnull metadata
 ; on loads from allocas that get optimized out.
 
-define float* @yummy_nonnull(float** %arg) {
-; CHECK-LABEL: define float* @yummy_nonnull(
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+
+; Check that we do basic propagation of nonnull when rewriting.
+define i8* @propagate_nonnull(i32* %v) {
+; CHECK-LABEL: define i8* @propagate_nonnull(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[A:.*]] = alloca i8*
+; CHECK-NEXT:    %[[V_CAST:.*]] = bitcast i32* %v to i8*
+; CHECK-NEXT:    store i8* %[[V_CAST]], i8** %[[A]]
+; CHECK-NEXT:    %[[LOAD:.*]] = load volatile i8*, i8** %[[A]], !nonnull !0
+; CHECK-NEXT:    ret i8* %[[LOAD]]
+entry:
+  %a = alloca [2 x i8*]
+  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
+  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
+  %a.gep0.cast = bitcast i8** %a.gep0 to i32**
+  %a.gep1.cast = bitcast i8** %a.gep1 to i32**
+  store i32* %v, i32** %a.gep1.cast
+  store i32* null, i32** %a.gep0.cast
+  %load = load volatile i8*, i8** %a.gep1, !nonnull !0
+  ret i8* %load
+}
+
+define float* @turn_nonnull_into_assume(float** %arg) {
+; CHECK-LABEL: define float* @turn_nonnull_into_assume(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    %[[RETURN:.*]] = load float*, float** %arg, align 8
 ; CHECK-NEXT:    %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
@@ -19,6 +42,4 @@ entry:
   ret float* %ret
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
-
 !0 = !{}

From b514eb0298959689758296b50c721ac5c64fd80c Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Tue, 27 Jun 2017 08:32:03 +0000
Subject: [PATCH 5/5] [SROA] Fix PR32902 by more carefully propagating !nonnull
 metadata.

This is based heavily on the work done ni D34285. I mostly wanted to do
test cleanup for the author to save them some time, but I had a really
hard time understanding why it was so hard to write better test cases
for these issues.

The problem is that because SROA does a second rewrite of the loads and
because we *don't* propagate !nonnull for non-pointer loads, we first
introduced invalid !nonnull metadata and then stripped it back off just
in time to avoid most ways of this PR manifesting. Moving to the more
careful utility only fixes this by changing the predicate to look at the
new load's type rather than the target type. However, that *does* fix
the bug, and the utility is much nicer including adding range metadata
to model the nonnull property after a conversion to an integer.

However, we have bigger problems because we don't actually propagate
*range* metadata, and the utility to do this extracted from instcombine
isn't really in good shape to do this currently. It *only* handles the
case of copying range metadata from an integer load to a pointer load.
It doesn't even handle the trivial cases of propagating from one integer
load to another when they are the same width! This utility will need to
be beefed up prior to using in this location to get the metadata to
fully survive.

And even then, we need to go and teach things to turn the range metadata
into an assume the way we do with nonnull so that when we *promote* an
integer we don't lose the information.

All of this will require a new test case that looks kind-of like
`preserve-nonnull.ll` does here but focuses on range metadata. It will
also likely require more testing because it needs to correctly handle
changes to the integer width, especially as SROA actively tries to
change the integer width!

Last but not least, I'm a little worried about hooking the range
metadata up here because the instcombine logic for converting from
a range metadata *to* a nonnull metadata node seems broken in the face
of non-zero address spaces where null is not mapped to the integer `0`.
So that probably needs to get fixed with test cases both in SROA and in
instcombine to cover it.

But this *does* extract the core PR fix from D34285 of preventing the
!nonnull metadata from being propagated in a broken state just long
enough to feed into promotion and crash value tracking.

On D34285 there is some discussion of zero-extend handling because it
isn't necessary. First, the new load size covers all of the non-undef
(ie, possibly initialized) bits. This may even extend past the original
alloca if loading those bits could produce valid data. The only way its
valid for us to zero-extend an integer load in SROA is if the original
code had a zero extend or those bits were undef. And we get to assume
things like undef *never* satifies nonnull, so non undef bits can
participate here. No need to special case the zero-extend handling, it
just falls out correctly.

The original credit goes to Ariel Ben-Yehuda! I'm mostly landing this to
save a few rounds of trivial edits fixing style issues and test case
formulation.

Differental Revision: D34285

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306379 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/SROA.cpp           | 15 +++++++-
 test/Transforms/SROA/preserve-nonnull.ll | 47 ++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 3ed2a51febaf..33f97c28a253 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2388,9 +2388,20 @@ class llvm::sroa::AllocaSliceRewriter
       if (LI.isVolatile())
         NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
 
+      // Any !nonnull metadata or !range metadata on the old load is also valid
+      // on the new load. This is even true in some cases even when the loads
+      // are different types, for example by mapping !nonnull metadata to
+      // !range metadata by modeling the null pointer constant converted to the
+      // integer type.
+      // FIXME: Add support for range metadata here. Currently the utilities
+      // for this don't propagate range metadata in trivial cases from one
+      // integer load to another, don't handle non-addrspace-0 null pointers
+      // correctly, and don't have any support for mapping ranges as the
+      // integer type becomes winder or narrower.
+      if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
+        copyNonnullMetadata(LI, N, *NewLI);
+
       // Try to preserve nonnull metadata
-      if (TargetTy->isPointerTy())
-        NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
       V = NewLI;
 
       // If this is an integer load past the end of the slice (which means the
diff --git a/test/Transforms/SROA/preserve-nonnull.ll b/test/Transforms/SROA/preserve-nonnull.ll
index 1ff8bec3af91..a29da6dc2c37 100644
--- a/test/Transforms/SROA/preserve-nonnull.ll
+++ b/test/Transforms/SROA/preserve-nonnull.ll
@@ -42,4 +42,51 @@ entry:
   ret float* %ret
 }
 
+; Make sure we properly handle the !nonnull attribute when we convert
+; a pointer load to an integer load.
+; FIXME: While this doesn't do anythnig actively harmful today, it really
+; should propagate the !nonnull metadata to range metadata. The irony is, it
+; *does* initially, but then we lose that !range metadata before we finish
+; SROA.
+define i8* @propagate_nonnull_to_int() {
+; CHECK-LABEL: define i8* @propagate_nonnull_to_int(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[A:.*]] = alloca i64
+; CHECK-NEXT:    store i64 42, i64* %[[A]]
+; CHECK-NEXT:    %[[LOAD:.*]] = load volatile i64, i64* %[[A]]
+; CHECK-NEXT:    %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8*
+; CHECK-NEXT:    ret i8* %[[CAST]]
+entry:
+  %a = alloca [2 x i8*]
+  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
+  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
+  %a.gep0.cast = bitcast i8** %a.gep0 to i64*
+  %a.gep1.cast = bitcast i8** %a.gep1 to i64*
+  store i64 42, i64* %a.gep1.cast
+  store i64 0, i64* %a.gep0.cast
+  %load = load volatile i8*, i8** %a.gep1, !nonnull !0
+  ret i8* %load
+}
+
+; Make sure we properly handle the !nonnull attribute when we convert
+; a pointer load to an integer load and immediately promote it to an SSA
+; register. This can fail in interesting ways due to the rewrite iteration of
+; SROA, resulting in PR32902.
+define i8* @propagate_nonnull_to_int_and_promote() {
+; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8*
+; CHECK-NEXT:    ret i8* %[[PROMOTED_VALUE]]
+entry:
+  %a = alloca [2 x i8*], align 8
+  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
+  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
+  %a.gep0.cast = bitcast i8** %a.gep0 to i64*
+  %a.gep1.cast = bitcast i8** %a.gep1 to i64*
+  store i64 42, i64* %a.gep1.cast
+  store i64 0, i64* %a.gep0.cast
+  %load = load i8*, i8** %a.gep1, align 8, !nonnull !0
+  ret i8* %load
+}
+
 !0 = !{}