Bug#879016: llvm-toolchain-4.0: Please backport these upstream patches to fix two failing tests for rustc

Ximin Luo infinity0 at debian.org
Wed Oct 18 14:06:00 UTC 2017


Ximin Luo:
> Please apply the attached debdiff, [..]
Whoops, here it is again rebased against the latest sid version, 1:4.0.1-7

X

-- 
GPG: ed25519/56034877E1F87C35
GPG: rsa4096/1318EFAC5FBBDBCE
https://github.com/infinity0/pubkeys.git
-------------- next part --------------
diff -Nru llvm-toolchain-4.0-4.0.1/debian/changelog llvm-toolchain-4.0-4.0.1/debian/changelog
--- llvm-toolchain-4.0-4.0.1/debian/changelog	2017-10-13 21:22:54.000000000 +0200
+++ llvm-toolchain-4.0-4.0.1/debian/changelog	2017-10-18 15:28:20.000000000 +0200
@@ -1,3 +1,11 @@
+llvm-toolchain-4.0 (1:4.0.1-7.1) UNRELEASED; urgency=medium
+
+  * Non-maintainer upload.
+  * Backport some patches (originally from rust, and upstreamed) to fix two
+    failing tests in rustc.
+
+ -- Ximin Luo <infinity0 at debian.org>  Wed, 18 Oct 2017 15:28:20 +0200
+
 llvm-toolchain-4.0 (1:4.0.1-7) unstable; urgency=medium
 
   * Force the deactivation of ocaml until the transition is done
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch llvm-toolchain-4.0-4.0.1/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch
--- llvm-toolchain-4.0-4.0.1/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch	2017-04-16 19:13:22.000000000 +0200
+++ llvm-toolchain-4.0-4.0.1/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch	1970-01-01 01:00:00.000000000 +0100
@@ -1,84 +0,0 @@
-From eee68eafa7e8e4ce996b49f5551636639a6c331a Mon Sep 17 00:00:00 2001
-From: David Majnemer <david.majnemer at gmail.com>
-Date: Mon, 29 Aug 2016 17:14:08 +0000
-Subject: [PATCH 11/17] [SimplifyCFG] Hoisting invalidates metadata
-
-We forgot to remove optimization metadata when performing hosting during
-FoldTwoEntryPHINode.
-
-This fixes PR29163.
-
-git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279980 91177308-0d34-0410-b5e6-96231b3b80d8
----
- lib/Transforms/Utils/SimplifyCFG.cpp   | 10 ++++++++--
- test/Transforms/SimplifyCFG/PR29163.ll | 31 +++++++++++++++++++++++++++++++
- 2 files changed, 39 insertions(+), 2 deletions(-)
- create mode 100644 test/Transforms/SimplifyCFG/PR29163.ll
-
-diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
-index 0504646..c197317 100644
---- a/lib/Transforms/Utils/SimplifyCFG.cpp
-+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
-@@ -2024,14 +2024,20 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
- 
-   // Move all 'aggressive' instructions, which are defined in the
-   // conditional parts of the if's up to the dominating block.
--  if (IfBlock1)
-+  if (IfBlock1) {
-+    for (auto &I : *IfBlock1)
-+      I.dropUnknownNonDebugMetadata();
-     DomBlock->getInstList().splice(InsertPt->getIterator(),
-                                    IfBlock1->getInstList(), IfBlock1->begin(),
-                                    IfBlock1->getTerminator()->getIterator());
--  if (IfBlock2)
-+  }
-+  if (IfBlock2) {
-+    for (auto &I : *IfBlock2)
-+      I.dropUnknownNonDebugMetadata();
-     DomBlock->getInstList().splice(InsertPt->getIterator(),
-                                    IfBlock2->getInstList(), IfBlock2->begin(),
-                                    IfBlock2->getTerminator()->getIterator());
-+  }
- 
-   while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
-     // Change the PHI node into a select instruction.
-diff --git a/test/Transforms/SimplifyCFG/PR29163.ll b/test/Transforms/SimplifyCFG/PR29163.ll
-new file mode 100644
-index 0000000..65f9090
---- /dev/null
-+++ b/test/Transforms/SimplifyCFG/PR29163.ll
-@@ -0,0 +1,31 @@
-+; RUN: opt -S -simplifycfg < %s | FileCheck %s
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-+target triple = "x86_64-unknown-linux-gnu"
-+
-+ at GV = external constant i64*
-+
-+define i64* @test1(i1 %cond, i8* %P) {
-+entry:
-+  br i1 %cond, label %if, label %then
-+
-+then:
-+  %bc = bitcast i8* %P to i64*
-+  br label %join
-+
-+if:
-+  %load = load i64*, i64** @GV, align 8, !dereferenceable !0
-+  br label %join
-+
-+join:
-+  %phi = phi i64* [ %bc, %then ], [ %load, %if ]
-+  ret i64* %phi
-+}
-+
-+; CHECK-LABEL: define i64* @test1(
-+; CHECK: %[[bc:.*]] = bitcast i8* %P to i64*
-+; CHECK: %[[load:.*]] = load i64*, i64** @GV, align 8{{$}}
-+; CHECK: %[[phi:.*]] = select i1 %cond, i64* %[[load]], i64* %[[bc]]
-+; CHECK: ret i64* %[[phi]]
-+
-+
-+!0 = !{i64 8}
--- 
-2.10.1
-
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/rL298540.diff llvm-toolchain-4.0-4.0.1/debian/patches/rL298540.diff
--- llvm-toolchain-4.0-4.0.1/debian/patches/rL298540.diff	1970-01-01 01:00:00.000000000 +0100
+++ llvm-toolchain-4.0-4.0.1/debian/patches/rL298540.diff	2017-10-18 15:28:20.000000000 +0200
@@ -0,0 +1,292 @@
+commit 2b66aeed20b61d42cd2782b94d5419c6faca49a1
+Author: Luqman Aden <me at luqman.ca>
+Date:   Wed Mar 22 19:16:39 2017 +0000
+
+    Preserve nonnull metadata on Loads through SROA & mem2reg.
+    
+    Summary:
+    https://llvm.org/bugs/show_bug.cgi?id=31142 :
+    
+    SROA was dropping the nonnull metadata on loads from allocas that got optimized out. This patch simply preserves nonnull metadata on loads through SROA and mem2reg.
+    
+    Reviewers: chandlerc, efriedma
+    
+    Reviewed By: efriedma
+    
+    Subscribers: hfinkel, spatel, efriedma, arielb1, davide, llvm-commits
+    
+    Differential Revision: https://reviews.llvm.org/D27114
+    
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298540 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/lib/Transforms/Scalar/SROA.cpp
++++ b/lib/Transforms/Scalar/SROA.cpp
+@@ -2387,6 +2387,10 @@
+                                               LI.isVolatile(), LI.getName());
+       if (LI.isVolatile())
+         NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
++
++      // Try to preserve nonnull metadata
++      if (TargetTy->isPointerTy())
++        NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
+       V = NewLI;
+ 
+       // If this is an integer load past the end of the slice (which means the
+--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
++++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+@@ -15,7 +15,6 @@
+ //
+ //===----------------------------------------------------------------------===//
+ 
+-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+ #include "llvm/ADT/ArrayRef.h"
+ #include "llvm/ADT/DenseMap.h"
+ #include "llvm/ADT/STLExtras.h"
+@@ -23,6 +22,7 @@
+ #include "llvm/ADT/SmallVector.h"
+ #include "llvm/ADT/Statistic.h"
+ #include "llvm/Analysis/AliasSetTracker.h"
++#include "llvm/Analysis/AssumptionCache.h"
+ #include "llvm/Analysis/InstructionSimplify.h"
+ #include "llvm/Analysis/IteratedDominanceFrontier.h"
+ #include "llvm/Analysis/ValueTracking.h"
+@@ -38,6 +38,7 @@
+ #include "llvm/IR/Metadata.h"
+ #include "llvm/IR/Module.h"
+ #include "llvm/Transforms/Utils/Local.h"
++#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+ #include <algorithm>
+ using namespace llvm;
+ 
+@@ -301,6 +302,18 @@
+ 
+ } // end of anonymous namespace
+ 
++/// Given a LoadInst LI this adds assume(LI != null) after it.
++static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
++  Function *AssumeIntrinsic =
++      Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
++  ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
++                                       Constant::getNullValue(LI->getType()));
++  LoadNotNull->insertAfter(LI);
++  CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
++  CI->insertAfter(LoadNotNull);
++  AC->registerAssumption(CI);
++}
++
+ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+   // Knowing that this alloca is promotable, we know that it's safe to kill all
+   // instructions except for load and store.
+@@ -334,9 +347,9 @@
+ /// and thus must be phi-ed with undef. We fall back to the standard alloca
+ /// promotion algorithm in that case.
+ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+-                                     LargeBlockInfo &LBI,
+-                                     DominatorTree &DT,
+-                                     AliasSetTracker *AST) {
++                                     LargeBlockInfo &LBI, DominatorTree &DT,
++                                     AliasSetTracker *AST,
++                                     AssumptionCache *AC) {
+   StoreInst *OnlyStore = Info.OnlyStore;
+   bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+   BasicBlock *StoreBB = OnlyStore->getParent();
+@@ -387,6 +400,14 @@
+     // code.
+     if (ReplVal == LI)
+       ReplVal = UndefValue::get(LI->getType());
++
++    // If the load was marked as nonnull we don't want to lose
++    // that information when we erase this Load. So we preserve
++    // it with an assume.
++    if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
++        !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
++      addAssumeNonNull(AC, LI);
++
+     LI->replaceAllUsesWith(ReplVal);
+     if (AST && LI->getType()->isPointerTy())
+       AST->deleteValue(LI);
+@@ -435,7 +456,9 @@
+ ///  }
+ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
+                                      LargeBlockInfo &LBI,
+-                                     AliasSetTracker *AST) {
++                                     AliasSetTracker *AST,
++                                     DominatorTree &DT,
++                                     AssumptionCache *AC) {
+   // The trickiest case to handle is when we have large blocks. Because of this,
+   // this code is optimized assuming that large blocks happen.  This does not
+   // significantly pessimize the small block case.  This uses LargeBlockInfo to
+@@ -476,10 +499,17 @@
+         // There is no store before this load, bail out (load may be affected
+         // by the following stores - see main comment).
+         return false;
+-    }
+-    else
++    } else {
+       // Otherwise, there was a store before this load, the load takes its value.
+-      LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
++      // Note, if the load was marked as nonnull we don't want to lose that
++      // information when we erase it. So we preserve it with an assume.
++      Value *ReplVal = std::prev(I)->second->getOperand(0);
++      if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
++          !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
++        addAssumeNonNull(AC, LI);
++
++      LI->replaceAllUsesWith(ReplVal);
++    }
+ 
+     if (AST && LI->getType()->isPointerTy())
+       AST->deleteValue(LI);
+@@ -553,7 +583,7 @@
+     // If there is only a single store to this value, replace any loads of
+     // it that are directly dominated by the definition with the value stored.
+     if (Info.DefiningBlocks.size() == 1) {
+-      if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
++      if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST, AC)) {
+         // The alloca has been processed, move on.
+         RemoveFromAllocasList(AllocaNum);
+         ++NumSingleStore;
+@@ -564,7 +594,7 @@
+     // If the alloca is only read and written in one basic block, just perform a
+     // linear sweep over the block to eliminate it.
+     if (Info.OnlyUsedInOneBlock &&
+-        promoteSingleBlockAlloca(AI, Info, LBI, AST)) {
++        promoteSingleBlockAlloca(AI, Info, LBI, AST, DT, AC)) {
+       // The alloca has been processed, move on.
+       RemoveFromAllocasList(AllocaNum);
+       continue;
+@@ -940,6 +970,13 @@
+ 
+       Value *V = IncomingVals[AI->second];
+ 
++      // If the load was marked as nonnull we don't want to lose
++      // that information when we erase this Load. So we preserve
++      // it with an assume.
++      if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
++          !llvm::isKnownNonNullAt(V, LI, &DT))
++        addAssumeNonNull(AC, LI);
++
+       // Anything using the load now uses the current value.
+       LI->replaceAllUsesWith(V);
+       if (AST && LI->getType()->isPointerTy())
+--- /dev/null
++++ b/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll
+@@ -0,0 +1,89 @@
++; RUN: opt < %s -mem2reg -S | FileCheck %s
++
++; This tests that mem2reg preserves the !nonnull metadata on loads
++; from allocas that get optimized out.
++
++; Check the case where the alloca in question has a single store.
++define float* @single_store(float** %arg) {
++; CHECK-LABEL: define float* @single_store
++; CHECK: %arg.load = load float*, float** %arg, align 8
++; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
++; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
++; CHECK: ret float* %arg.load
++entry:
++  %buf = alloca float*
++  %arg.load = load float*, float** %arg, align 8
++  store float* %arg.load, float** %buf, align 8
++  %buf.load = load float*, float **%buf, !nonnull !0
++  ret float* %buf.load
++}
++
++; Check the case where the alloca in question has more than one
++; store but still within one basic block.
++define float* @single_block(float** %arg) {
++; CHECK-LABEL: define float* @single_block
++; CHECK: %arg.load = load float*, float** %arg, align 8
++; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
++; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
++; CHECK: ret float* %arg.load
++entry:
++  %buf = alloca float*
++  %arg.load = load float*, float** %arg, align 8
++  store float* null, float** %buf, align 8
++  store float* %arg.load, float** %buf, align 8
++  %buf.load = load float*, float **%buf, !nonnull !0
++  ret float* %buf.load
++}
++
++; Check the case where the alloca in question has more than one
++; store and also reads ands writes in multiple blocks.
++define float* @multi_block(float** %arg) {
++; CHECK-LABEL: define float* @multi_block
++; CHECK-LABEL: entry:
++; CHECK: %arg.load = load float*, float** %arg, align 8
++; CHECK: br label %next
++; CHECK-LABEL: next:
++; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
++; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
++; CHECK: ret float* %arg.load
++entry:
++  %buf = alloca float*
++  %arg.load = load float*, float** %arg, align 8
++  store float* null, float** %buf, align 8
++  br label %next
++next:
++  store float* %arg.load, float** %buf, align 8
++  %buf.load = load float*, float** %buf, !nonnull !0
++  ret float* %buf.load
++}
++
++; Check that we don't add an assume if it's not
++; necessary i.e. the value is already implied to be nonnull
++define float* @no_assume(float** %arg) {
++; CHECK-LABEL: define float* @no_assume
++; CHECK-LABEL: entry:
++; CHECK: %arg.load = load float*, float** %arg, align 8
++; CHECK: %cn = icmp ne float* %arg.load, null
++; CHECK: br i1 %cn, label %next, label %fin
++; CHECK-LABEL: next:
++; CHECK-NOT: call void @llvm.assume
++; CHECK: ret float* %arg.load
++; CHECK-LABEL: fin:
++; CHECK: ret float* null
++entry:
++  %buf = alloca float*
++  %arg.load = load float*, float** %arg, align 8
++  %cn = icmp ne float* %arg.load, null
++  br i1 %cn, label %next, label %fin
++next:
++; At this point the above nonnull check ensures that
++; the value %arg.load is nonnull in this block and thus
++; we need not add the assume.
++  store float* %arg.load, float** %buf, align 8
++  %buf.load = load float*, float** %buf, !nonnull !0
++  ret float* %buf.load
++fin:
++  ret float* null
++}
++
++!0 = !{}
+--- /dev/null
++++ b/test/Transforms/SROA/preserve-nonnull.ll
+@@ -0,0 +1,26 @@
++; RUN: opt < %s -sroa -S | FileCheck %s
++;
++; Make sure that SROA doesn't lose nonnull metadata
++; on loads from allocas that get optimized out.
++
++; CHECK-LABEL: define float* @yummy_nonnull
++; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
++; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
++; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
++; CHECK: ret float* {{.*}}[[RETURN]]
++
++define float* @yummy_nonnull(float** %arg) {
++entry-block:
++	%buf = alloca float*
++
++	%_arg_i8 = bitcast float** %arg to i8*
++	%_buf_i8 = bitcast float** %buf to i8*
++	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
++
++	%ret = load float*, float** %buf, align 8, !nonnull !0
++	ret float* %ret
++}
++
++declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
++
++!0 = !{}
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/rL305193-backport.diff llvm-toolchain-4.0-4.0.1/debian/patches/rL305193-backport.diff
--- llvm-toolchain-4.0-4.0.1/debian/patches/rL305193-backport.diff	1970-01-01 01:00:00.000000000 +0100
+++ llvm-toolchain-4.0-4.0.1/debian/patches/rL305193-backport.diff	2017-10-18 15:28:20.000000000 +0200
@@ -0,0 +1,442 @@
+commit 2b622a393ce80c6157d32a50bf67d6b830729469
+Author: Than McIntosh <thanm at google.com>
+Date:   Mon Jun 12 14:56:02 2017 +0000
+
+    StackColoring: smarter check for slot overlap
+    
+    Summary:
+    The old check for slot overlap treated 2 slots `S` and `T` as
+    overlapping if there existed a CFG node in which both of the slots could
+    possibly be active. That is overly conservative and caused stack blowups
+    in Rust programs. Instead, check whether there is a single CFG node in
+    which both of the slots are possibly active *together*.
+    
+    Fixes PR32488.
+    
+    Patch by Ariel Ben-Yehuda <ariel.byd at gmail.com>
+    
+    Reviewers: thanm, nagisa, llvm-commits, efriedma, rnk
+    
+    Reviewed By: thanm
+    
+    Subscribers: dotdash
+    
+    Differential Revision: https://reviews.llvm.org/D31583
+    
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305193 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/lib/CodeGen/StackColoring.cpp
++++ b/lib/CodeGen/StackColoring.cpp
+@@ -87,10 +87,134 @@
+ STATISTIC(StackSlotMerged, "Number of stack slot merged.");
+ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
+ 
++//===----------------------------------------------------------------------===//
++//                           StackColoring Pass
++//===----------------------------------------------------------------------===//
++//
++// Stack Coloring reduces stack usage by merging stack slots when they
++// can't be used together. For example, consider the following C program:
++//
++//     void bar(char *, int);
++//     void foo(bool var) {
++//         A: {
++//             char z[4096];
++//             bar(z, 0);
++//         }
++//
++//         char *p;
++//         char x[4096];
++//         char y[4096];
++//         if (var) {
++//             p = x;
++//         } else {
++//             bar(y, 1);
++//             p = y + 1024;
++//         }
++//     B:
++//         bar(p, 2);
++//     }
++//
++// Naively-compiled, this program would use 12k of stack space. However, the
++// stack slot corresponding to `z` is always destroyed before either of the
++// stack slots for `x` or `y` are used, and then `x` is only used if `var`
++// is true, while `y` is only used if `var` is false. So in no time are 2
++// of the stack slots used together, and therefore we can merge them,
++// compiling the function using only a single 4k alloca:
++//
++//     void foo(bool var) { // equivalent
++//         char x[4096];
++//         char *p;
++//         bar(x, 0);
++//         if (var) {
++//             p = x;
++//         } else {
++//             bar(x, 1);
++//             p = x + 1024;
++//         }
++//         bar(p, 2);
++//     }
++//
++// This is an important optimization if we want stack space to be under
++// control in large functions, both open-coded ones and ones created by
++// inlining.
+ //
+ // Implementation Notes:
+ // ---------------------
+ //
++// An important part of the above reasoning is that `z` can't be accessed
++// while the latter 2 calls to `bar` are running. This is justified because
++// `z`'s lifetime is over after we exit from block `A:`, so any further
++// accesses to it would be UB. The way we represent this information
++// in LLVM is by having frontends delimit blocks with `lifetime.start`
++// and `lifetime.end` intrinsics.
++//
++// The effect of these intrinsics seems to be as follows (maybe I should
++// specify this in the reference?):
++//
++//   L1) at start, each stack-slot is marked as *out-of-scope*, unless no
++//   lifetime intrinsic refers to that stack slot, in which case
++//   it is marked as *in-scope*.
++//   L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
++//   the stack slot is overwritten with `undef`.
++//   L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
++//   L4) on function exit, all stack slots are marked as *out-of-scope*.
++//   L5) `lifetime.end` is a no-op when called on a slot that is already
++//   *out-of-scope*.
++//   L6) memory accesses to *out-of-scope* stack slots are UB.
++//   L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
++//   are invalidated, unless the slot is "degenerate". This is used to
++//   justify not marking slots as in-use until the pointer to them is
++//   used, but feels a bit hacky in the presence of things like LICM. See
++//   the "Degenerate Slots" section for more details.
++//
++// Now, let's ground stack coloring on these rules. We'll define a slot
++// as *in-use* at a (dynamic) point in execution if it either can be
++// written to at that point, or if it has a live and non-undef content
++// at that point.
++//
++// Obviously, slots that are never *in-use* together can be merged, and
++// in our example `foo`, the slots for `x`, `y` and `z` are never
++// in-use together (of course, sometimes slots that *are* in-use together
++// might still be mergable, but we don't care about that here).
++//
++// In this implementation, we successively merge pairs of slots that are
++// not *in-use* together. We could be smarter - for example, we could merge
++// a single large slot with 2 small slots, or we could construct the
++// interference graph and run a "smart" graph coloring algorithm, but with
++// that aside, how do we find out whether a pair of slots might be *in-use*
++// together?
++//
++// From our rules, we see that *out-of-scope* slots are never *in-use*,
++// and from (L7) we see that "non-degenerate" slots remain non-*in-use*
++// until their address is taken. Therefore, we can approximate slot activity
++// using dataflow.
++//
++// A subtle point: naively, we might try to figure out which pairs of
++// stack-slots interfere by propagating `S in-use` through the CFG for every
++// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
++// which they are both *in-use*.
++//
++// That is sound, but overly conservative in some cases: in our (artificial)
++// example `foo`, either `x` or `y` might be in use at the label `B:`, but
++// as `x` is only in use if we came in from the `var` edge and `y` only
++// if we came from the `!var` edge, they still can't be in use together.
++// See PR32488 for an important real-life case.
++//
++// If we wanted to find all points of interference precisely, we could
++// propagate `S in-use` and `S&T in-use` predicates through the CFG. That
++// would be precise, but requires propagating `O(n^2)` dataflow facts.
++//
++// However, we aren't interested in the *set* of points of interference
++// between 2 stack slots, only *whether* there *is* such a point. So we
++// can rely on a little trick: for `S` and `T` to be in-use together,
++// one of them needs to become in-use while the other is in-use (or
++// they might both become in use simultaneously). We can check this
++// by also keeping track of the points at which a stack slot might *start*
++// being in-use.
++//
++// Exact first use:
++// ----------------
++//
+ // Consider the following motivating example:
+ //
+ //     int foo() {
+@@ -159,6 +283,9 @@
+ // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
+ // byte stack (better).
+ //
++// Degenerate Slots:
++// -----------------
++//
+ // Relying entirely on first-use of stack slots is problematic,
+ // however, due to the fact that optimizations can sometimes migrate
+ // uses of a variable outside of its lifetime start/end region. Here
+@@ -238,10 +365,6 @@
+ // for "b" then it will appear that 'b' has a degenerate lifetime.
+ //
+ 
+-//===----------------------------------------------------------------------===//
+-//                           StackColoring Pass
+-//===----------------------------------------------------------------------===//
+-
+ namespace {
+ /// StackColoring - A machine pass for merging disjoint stack allocations,
+ /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
+@@ -272,8 +395,11 @@
+   /// Maps basic blocks to a serial number.
+   SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
+ 
+-  /// Maps liveness intervals for each slot.
++  /// Maps slots to their use interval. Outside of this interval, slots
++  /// values are either dead or `undef` and they will not be written to.
+   SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
++  /// Maps slots to the points where they can become in-use.
++  SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
+   /// VNInfo is used for the construction of LiveIntervals.
+   VNInfo::Allocator VNInfoAllocator;
+   /// SlotIndex analysis object.
+@@ -676,15 +802,22 @@
+ 
+ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
+   SmallVector<SlotIndex, 16> Starts;
+-  SmallVector<SlotIndex, 16> Finishes;
++  SmallVector<bool, 16> DefinitelyInUse;
+ 
+   // For each block, find which slots are active within this block
+   // and update the live intervals.
+   for (const MachineBasicBlock &MBB : *MF) {
+     Starts.clear();
+     Starts.resize(NumSlots);
+-    Finishes.clear();
+-    Finishes.resize(NumSlots);
++    DefinitelyInUse.clear();
++    DefinitelyInUse.resize(NumSlots);
++
++    // Start the interval of the slots that we previously found to be 'in-use'.
++    BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
++    for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
++         pos = MBBLiveness.LiveIn.find_next(pos)) {
++      Starts[pos] = Indexes->getMBBStartIdx(&MBB);
++    }
+ 
+     // Create the interval for the basic blocks containing lifetime begin/end.
+     for (const MachineInstr &MI : MBB) {
+@@ -696,68 +829,35 @@
+       SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
+       for (auto Slot : slots) {
+         if (IsStart) {
+-          if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
++          // If a slot is already definitely in use, we don't have to emit
++          // a new start marker because there is already a pre-existing
++          // one.
++          if (!DefinitelyInUse[Slot]) {
++            LiveStarts[Slot].push_back(ThisIndex);
++            DefinitelyInUse[Slot] = true;
++          }
++          if (!Starts[Slot].isValid())
+             Starts[Slot] = ThisIndex;
+         } else {
+-          if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
+-            Finishes[Slot] = ThisIndex;
++          if (Starts[Slot].isValid()) {
++            VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
++            Intervals[Slot]->addSegment(
++                LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
++            Starts[Slot] = SlotIndex(); // Invalidate the start index
++            DefinitelyInUse[Slot] = false;
++          }
+         }
+       }
+     }
+ 
+-    // Create the interval of the blocks that we previously found to be 'alive'.
+-    BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
+-    for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
+-         pos = MBBLiveness.LiveIn.find_next(pos)) {
+-      Starts[pos] = Indexes->getMBBStartIdx(&MBB);
+-    }
+-    for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1;
+-         pos = MBBLiveness.LiveOut.find_next(pos)) {
+-      Finishes[pos] = Indexes->getMBBEndIdx(&MBB);
+-    }
+-
++    // Finish up started segments
+     for (unsigned i = 0; i < NumSlots; ++i) {
+-      //
+-      // When LifetimeStartOnFirstUse is turned on, data flow analysis
+-      // is forward (from starts to ends), not bidirectional. A
+-      // consequence of this is that we can wind up in situations
+-      // where Starts[i] is invalid but Finishes[i] is valid and vice
+-      // versa. Example:
+-      //
+-      //     LIFETIME_START x
+-      //     if (...) {
+-      //       <use of x>
+-      //       throw ...;
+-      //     }
+-      //     LIFETIME_END x
+-      //     return 2;
+-      //
+-      //
+-      // Here the slot for "x" will not be live into the block
+-      // containing the "return 2" (since lifetimes start with first
+-      // use, not at the dominating LIFETIME_START marker).
+-      //
+-      if (Starts[i].isValid() && !Finishes[i].isValid()) {
+-        Finishes[i] = Indexes->getMBBEndIdx(&MBB);
+-      }
+       if (!Starts[i].isValid())
+         continue;
+ 
+-      assert(Starts[i] && Finishes[i] && "Invalid interval");
+-      VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
+-      SlotIndex S = Starts[i];
+-      SlotIndex F = Finishes[i];
+-      if (S < F) {
+-        // We have a single consecutive region.
+-        Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum));
+-      } else {
+-        // We have two non-consecutive regions. This happens when
+-        // LIFETIME_START appears after the LIFETIME_END marker.
+-        SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB);
+-        SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB);
+-        Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum));
+-        Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum));
+-      }
++      SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
++      VNInfo *VNI = Intervals[i]->getValNumInfo(0);
++      Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
+     }
+   }
+ }
+@@ -987,6 +1087,7 @@
+   BasicBlockNumbering.clear();
+   Markers.clear();
+   Intervals.clear();
++  LiveStarts.clear();
+   VNInfoAllocator.Reset();
+ 
+   unsigned NumSlots = MFI->getObjectIndexEnd();
+@@ -998,6 +1099,7 @@
+   SmallVector<int, 8> SortedSlots;
+   SortedSlots.reserve(NumSlots);
+   Intervals.reserve(NumSlots);
++  LiveStarts.resize(NumSlots);
+ 
+   unsigned NumMarkers = collectMarkers(NumSlots);
+ 
+@@ -1069,6 +1171,9 @@
+     return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
+   });
+ 
++  for (auto &s : LiveStarts)
++    std::sort(s.begin(), s.end());
++
+   bool Changed = true;
+   while (Changed) {
+     Changed = false;
+@@ -1084,12 +1189,22 @@
+         int SecondSlot = SortedSlots[J];
+         LiveInterval *First = &*Intervals[FirstSlot];
+         LiveInterval *Second = &*Intervals[SecondSlot];
++        auto &FirstS = LiveStarts[FirstSlot];
++        auto &SecondS = LiveStarts[SecondSlot];
+         assert (!First->empty() && !Second->empty() && "Found an empty range");
+ 
+-        // Merge disjoint slots.
+-        if (!First->overlaps(*Second)) {
++        // Merge disjoint slots. This is a little bit tricky - see the
++        // Implementation Notes section for an explanation.
++        if (!First->isLiveAtIndexes(SecondS) &&
++            !Second->isLiveAtIndexes(FirstS)) {
+           Changed = true;
+           First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
++
++          int OldSize = FirstS.size();
++          FirstS.append(SecondS.begin(), SecondS.end());
++          auto Mid = FirstS.begin() + OldSize;
++          std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
++
+           SlotRemap[SecondSlot] = FirstSlot;
+           SortedSlots[J] = -1;
+           DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
+--- a/test/CodeGen/X86/StackColoring.ll
++++ b/test/CodeGen/X86/StackColoring.ll
+@@ -582,12 +582,76 @@
+   ret i32 %x.addr.0
+ }
+ 
++;CHECK-LABEL: multi_segment:
++;YESCOLOR: subq  $256, %rsp
++;NOFIRSTUSE: subq  $256, %rsp
++;NOCOLOR: subq  $512, %rsp
++define i1 @multi_segment(i1, i1)
++{
++entry-block:
++  %foo = alloca [32 x i64]
++  %bar = alloca [32 x i64]
++  %foo_i8 = bitcast [32 x i64]* %foo to i8*
++  %bar_i8 = bitcast [32 x i64]* %bar to i8*
++  call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
++  call void @baz([32 x i64]* %bar, i32 1)
++  call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
++  call void @llvm.lifetime.start.p0i8(i64 256, i8* %foo_i8)
++  call void @baz([32 x i64]* %foo, i32 1)
++  call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
++  call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
++  call void @baz([32 x i64]* %bar, i32 1)
++  call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
++  ret i1 true
++}
++
++;CHECK-LABEL: pr32488:
++;YESCOLOR: subq  $256, %rsp
++;NOFIRSTUSE: subq  $256, %rsp
++;NOCOLOR: subq  $512, %rsp
++define i1 @pr32488(i1, i1)
++{
++entry-block:
++  %foo = alloca [32 x i64]
++  %bar = alloca [32 x i64]
++  %foo_i8 = bitcast [32 x i64]* %foo to i8*
++  %bar_i8 = bitcast [32 x i64]* %bar to i8*
++  br i1 %0, label %if_false, label %if_true
++if_false:
++  call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
++  call void @baz([32 x i64]* %bar, i32 0)
++  br i1 %1, label %if_false.1, label %onerr
++if_false.1:
++  call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
++  br label %merge
++if_true:
++  call void @llvm.lifetime.start.p0i8(i64 256, i8* %foo_i8)
++  call void @baz([32 x i64]* %foo, i32 1)
++  br i1 %1, label %if_true.1, label %onerr
++if_true.1:
++  call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
++  br label %merge
++merge:
++  ret i1 false
++onerr:
++  call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
++  call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
++  call void @destructor()
++  ret i1 true
++}
++
++%Data = type { [32 x i64] }
++
++declare void @destructor()
++
+ declare void @inita(i32*)
+ 
+ declare void @initb(i32*,i32*,i32*)
+ 
+ declare void @bar([100 x i32]* , [100 x i32]*) nounwind
+ 
++declare void @baz([32 x i64]*, i32)
++
+ declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+ 
+ declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/rL306267.diff llvm-toolchain-4.0-4.0.1/debian/patches/rL306267.diff
--- llvm-toolchain-4.0-4.0.1/debian/patches/rL306267.diff	1970-01-01 01:00:00.000000000 +0100
+++ llvm-toolchain-4.0-4.0.1/debian/patches/rL306267.diff	2017-10-18 15:28:20.000000000 +0200
@@ -0,0 +1,187 @@
+commit 5a057dc8edbb63887f8c611dd8ddf1b76997f07c
+Author: Chandler Carruth <chandlerc at gmail.com>
+Date:   Mon Jun 26 03:31:31 2017 +0000
+
+    [InstCombine] Factor the logic for propagating !nonnull and !range
+    metadata out of InstCombine and into helpers.
+    
+    NFC, this just exposes the logic used by InstCombine when propagating
+    metadata from one load instruction to another. The plan is to use this
+    in SROA to address PR32902.
+    
+    If anyone has better ideas about how to factor this or name variables,
+    I'm all ears, but this seemed like a pretty good start and lets us make
+    progress on the PR.
+    
+    This is based on a patch by Ariel Ben-Yehuda (D34285).
+    
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306267 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/include/llvm/Transforms/Utils/Local.h
++++ b/include/llvm/Transforms/Utils/Local.h
+@@ -366,6 +366,19 @@
+ /// during lowering by the GC infrastructure.
+ bool callsGCLeafFunction(ImmutableCallSite CS);
+ 
++/// Copy a nonnull metadata node to a new load instruction.
++///
++/// This handles mapping it to range metadata if the new load is an integer
++/// load instead of a pointer load.
++void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI);
++
++/// Copy a range metadata node to a new load instruction.
++///
++/// This handles mapping it to nonnull metadata if the new load is a pointer
++/// load instead of an integer load and the range doesn't cover null.
++void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N,
++                       LoadInst &NewLI);
++
+ //===----------------------------------------------------------------------===//
+ //  Intrinsic pattern matching
+ //
+--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
++++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+@@ -471,21 +471,7 @@
+       break;
+ 
+     case LLVMContext::MD_nonnull:
+-      // This only directly applies if the new type is also a pointer.
+-      if (NewTy->isPointerTy()) {
+-        NewLoad->setMetadata(ID, N);
+-        break;
+-      }
+-      // If it's integral now, translate it to !range metadata.
+-      if (NewTy->isIntegerTy()) {
+-        auto *ITy = cast<IntegerType>(NewTy);
+-        auto *NullInt = ConstantExpr::getPtrToInt(
+-            ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
+-        auto *NonNullInt =
+-            ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
+-        NewLoad->setMetadata(LLVMContext::MD_range,
+-                             MDB.createRange(NonNullInt, NullInt));
+-      }
++      copyNonnullMetadata(LI, N, *NewLoad);
+       break;
+     case LLVMContext::MD_align:
+     case LLVMContext::MD_dereferenceable:
+@@ -495,17 +481,7 @@
+         NewLoad->setMetadata(ID, N);
+       break;
+     case LLVMContext::MD_range:
+-      // FIXME: It would be nice to propagate this in some way, but the type
+-      // conversions make it hard.
+-
+-      // If it's a pointer now and the range does not contain 0, make it !nonnull.
+-      if (NewTy->isPointerTy()) {
+-        unsigned BitWidth = IC.getDataLayout().getTypeSizeInBits(NewTy);
+-        if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
+-          MDNode *NN = MDNode::get(LI.getContext(), None);
+-          NewLoad->setMetadata(LLVMContext::MD_nonnull, NN);
+-        }
+-      }
++      copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
+       break;
+     }
+   }
+--- a/lib/Transforms/Utils/Local.cpp
++++ b/lib/Transforms/Utils/Local.cpp
+@@ -26,6 +26,7 @@
+ #include "llvm/Analysis/LazyValueInfo.h"
+ #include "llvm/Analysis/ValueTracking.h"
+ #include "llvm/IR/CFG.h"
++#include "llvm/IR/ConstantRange.h"
+ #include "llvm/IR/Constants.h"
+ #include "llvm/IR/DIBuilder.h"
+ #include "llvm/IR/DataLayout.h"
+@@ -1069,7 +1070,7 @@
+ }
+ 
+ /// See if there is a dbg.value intrinsic for DIVar for the PHI node.
+-static bool PhiHasDebugValue(DILocalVariable *DIVar, 
++static bool PhiHasDebugValue(DILocalVariable *DIVar,
+                              DIExpression *DIExpr,
+                              PHINode *APN) {
+   // Since we can't guarantee that the original dbg.declare instrinsic
+@@ -1152,7 +1153,7 @@
+   DbgValue->insertAfter(LI);
+ }
+ 
+-/// Inserts a llvm.dbg.value intrinsic after a phi 
++/// Inserts a llvm.dbg.value intrinsic after a phi
+ /// that has an associated llvm.dbg.decl intrinsic.
+ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+                                            PHINode *APN, DIBuilder &Builder) {
+@@ -1723,12 +1724,12 @@
+         // Preserve !invariant.group in K.
+         break;
+       case LLVMContext::MD_align:
+-        K->setMetadata(Kind, 
++        K->setMetadata(Kind,
+           MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+         break;
+       case LLVMContext::MD_dereferenceable:
+       case LLVMContext::MD_dereferenceable_or_null:
+-        K->setMetadata(Kind, 
++        K->setMetadata(Kind,
+           MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+         break;
+     }
+@@ -1812,6 +1813,49 @@
+   return false;
+ }
+ 
++void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
++                               LoadInst &NewLI) {
++  auto *NewTy = NewLI.getType();
++
++  // This only directly applies if the new type is also a pointer.
++  if (NewTy->isPointerTy()) {
++    NewLI.setMetadata(LLVMContext::MD_nonnull, N);
++    return;
++  }
++
++  // The only other translation we can do is to integral loads with !range
++  // metadata.
++  if (!NewTy->isIntegerTy())
++    return;
++
++  MDBuilder MDB(NewLI.getContext());
++  const Value *Ptr = OldLI.getPointerOperand();
++  auto *ITy = cast<IntegerType>(NewTy);
++  auto *NullInt = ConstantExpr::getPtrToInt(
++      ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
++  auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
++  NewLI.setMetadata(LLVMContext::MD_range,
++                    MDB.createRange(NonNullInt, NullInt));
++}
++
++void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
++                             MDNode *N, LoadInst &NewLI) {
++  auto *NewTy = NewLI.getType();
++
++  // Give up unless it is converted to a pointer where there is a single very
++  // valuable mapping we can do reliably.
++  // FIXME: It would be nice to propagate this in more ways, but the type
++  // conversions make it hard.
++  if (!NewTy->isPointerTy())
++    return;
++
++  unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
++  if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
++    MDNode *NN = MDNode::get(OldLI.getContext(), None);
++    NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
++  }
++}
++
+ namespace {
+ /// A potential constituent of a bitreverse or bswap expression. See
+ /// collectBitParts for a fuller explanation.
+@@ -1933,7 +1977,7 @@
+       unsigned NumMaskedBits = AndMask.countPopulation();
+       if (!MatchBitReversals && NumMaskedBits % 8 != 0)
+         return Result;
+-      
++
+       auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
+                                   MatchBitReversals, BPS);
+       if (!Res)
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/rL306353.diff llvm-toolchain-4.0-4.0.1/debian/patches/rL306353.diff
--- llvm-toolchain-4.0-4.0.1/debian/patches/rL306353.diff	1970-01-01 01:00:00.000000000 +0100
+++ llvm-toolchain-4.0-4.0.1/debian/patches/rL306353.diff	2017-10-18 15:28:20.000000000 +0200
@@ -0,0 +1,47 @@
+commit 477bd758b48cb96477d3dd4cf3b36bf2706e8c1d
+Author: Chandler Carruth <chandlerc at gmail.com>
+Date:   Tue Jun 27 02:23:15 2017 +0000
+
+    [SROA] Clean up a test case a bit prior to adding more testing for
+    nonnull as part of fixing PR32902.
+    
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306353 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/test/Transforms/SROA/preserve-nonnull.ll
++++ b/test/Transforms/SROA/preserve-nonnull.ll
+@@ -3,22 +3,20 @@
+ ; Make sure that SROA doesn't lose nonnull metadata
+ ; on loads from allocas that get optimized out.
+ 
+-; CHECK-LABEL: define float* @yummy_nonnull
+-; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
+-; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
+-; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
+-; CHECK: ret float* {{.*}}[[RETURN]]
+-
+ define float* @yummy_nonnull(float** %arg) {
+-entry-block:
+-	%buf = alloca float*
+-
+-	%_arg_i8 = bitcast float** %arg to i8*
+-	%_buf_i8 = bitcast float** %buf to i8*
+-	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
+-
+-	%ret = load float*, float** %buf, align 8, !nonnull !0
+-	ret float* %ret
++; CHECK-LABEL: define float* @yummy_nonnull(
++; CHECK-NEXT:  entry:
++; CHECK-NEXT:    %[[RETURN:.*]] = load float*, float** %arg, align 8
++; CHECK-NEXT:    %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
++; CHECK-NEXT:    call void @llvm.assume(i1 %[[ASSUME]])
++; CHECK-NEXT:    ret float* %[[RETURN]]
++entry:
++  %buf = alloca float*
++  %_arg_i8 = bitcast float** %arg to i8*
++  %_buf_i8 = bitcast float** %buf to i8*
++  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
++  %ret = load float*, float** %buf, align 8, !nonnull !0
++  ret float* %ret
+ }
+ 
+ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/rL306358.diff llvm-toolchain-4.0-4.0.1/debian/patches/rL306358.diff
--- llvm-toolchain-4.0-4.0.1/debian/patches/rL306358.diff	1970-01-01 01:00:00.000000000 +0100
+++ llvm-toolchain-4.0-4.0.1/debian/patches/rL306358.diff	2017-10-18 15:28:20.000000000 +0200
@@ -0,0 +1,52 @@
+commit 156cc49e505986a1659adaa3a0b5a070372377c8
+Author: Chandler Carruth <chandlerc at gmail.com>
+Date:   Tue Jun 27 03:08:45 2017 +0000
+
+    [SROA] Further test cleanup and add a test for the actual propagation of
+    the nonnull attribute distinct from rewriting it into an assume.
+    
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306358 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/test/Transforms/SROA/preserve-nonnull.ll
++++ b/test/Transforms/SROA/preserve-nonnull.ll
+@@ -3,8 +3,31 @@
+ ; Make sure that SROA doesn't lose nonnull metadata
+ ; on loads from allocas that get optimized out.
+ 
+-define float* @yummy_nonnull(float** %arg) {
+-; CHECK-LABEL: define float* @yummy_nonnull(
++declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
++
++; Check that we do basic propagation of nonnull when rewriting.
++define i8* @propagate_nonnull(i32* %v) {
++; CHECK-LABEL: define i8* @propagate_nonnull(
++; CHECK-NEXT:  entry:
++; CHECK-NEXT:    %[[A:.*]] = alloca i8*
++; CHECK-NEXT:    %[[V_CAST:.*]] = bitcast i32* %v to i8*
++; CHECK-NEXT:    store i8* %[[V_CAST]], i8** %[[A]]
++; CHECK-NEXT:    %[[LOAD:.*]] = load volatile i8*, i8** %[[A]], !nonnull !0
++; CHECK-NEXT:    ret i8* %[[LOAD]]
++entry:
++  %a = alloca [2 x i8*]
++  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
++  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
++  %a.gep0.cast = bitcast i8** %a.gep0 to i32**
++  %a.gep1.cast = bitcast i8** %a.gep1 to i32**
++  store i32* %v, i32** %a.gep1.cast
++  store i32* null, i32** %a.gep0.cast
++  %load = load volatile i8*, i8** %a.gep1, !nonnull !0
++  ret i8* %load
++}
++
++define float* @turn_nonnull_into_assume(float** %arg) {
++; CHECK-LABEL: define float* @turn_nonnull_into_assume(
+ ; CHECK-NEXT:  entry:
+ ; CHECK-NEXT:    %[[RETURN:.*]] = load float*, float** %arg, align 8
+ ; CHECK-NEXT:    %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
+@@ -19,6 +42,4 @@
+   ret float* %ret
+ }
+ 
+-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+-
+ !0 = !{}
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/rL306379.diff llvm-toolchain-4.0-4.0.1/debian/patches/rL306379.diff
--- llvm-toolchain-4.0-4.0.1/debian/patches/rL306379.diff	1970-01-01 01:00:00.000000000 +0100
+++ llvm-toolchain-4.0-4.0.1/debian/patches/rL306379.diff	2017-10-18 15:28:20.000000000 +0200
@@ -0,0 +1,147 @@
+commit 7df06519765b14e1b08d7034c82c45a0a653eb25
+Author: Chandler Carruth <chandlerc at gmail.com>
+Date:   Tue Jun 27 08:32:03 2017 +0000
+
+    [SROA] Fix PR32902 by more carefully propagating !nonnull metadata.
+    
+    This is based heavily on the work done ni D34285. I mostly wanted to do
+    test cleanup for the author to save them some time, but I had a really
+    hard time understanding why it was so hard to write better test cases
+    for these issues.
+    
+    The problem is that because SROA does a second rewrite of the loads and
+    because we *don't* propagate !nonnull for non-pointer loads, we first
+    introduced invalid !nonnull metadata and then stripped it back off just
+    in time to avoid most ways of this PR manifesting. Moving to the more
+    careful utility only fixes this by changing the predicate to look at the
+    new load's type rather than the target type. However, that *does* fix
+    the bug, and the utility is much nicer including adding range metadata
+    to model the nonnull property after a conversion to an integer.
+    
+    However, we have bigger problems because we don't actually propagate
+    *range* metadata, and the utility to do this extracted from instcombine
+    isn't really in good shape to do this currently. It *only* handles the
+    case of copying range metadata from an integer load to a pointer load.
+    It doesn't even handle the trivial cases of propagating from one integer
+    load to another when they are the same width! This utility will need to
+    be beefed up prior to using in this location to get the metadata to
+    fully survive.
+    
+    And even then, we need to go and teach things to turn the range metadata
+    into an assume the way we do with nonnull so that when we *promote* an
+    integer we don't lose the information.
+    
+    All of this will require a new test case that looks kind-of like
+    `preserve-nonnull.ll` does here but focuses on range metadata. It will
+    also likely require more testing because it needs to correctly handle
+    changes to the integer width, especially as SROA actively tries to
+    change the integer width!
+    
+    Last but not least, I'm a little worried about hooking the range
+    metadata up here because the instcombine logic for converting from
+    a range metadata *to* a nonnull metadata node seems broken in the face
+    of non-zero address spaces where null is not mapped to the integer `0`.
+    So that probably needs to get fixed with test cases both in SROA and in
+    instcombine to cover it.
+    
+    But this *does* extract the core PR fix from D34285 of preventing the
+    !nonnull metadata from being propagated in a broken state just long
+    enough to feed into promotion and crash value tracking.
+    
+    On D34285 there is some discussion of zero-extend handling because it
+    isn't necessary. First, the new load size covers all of the non-undef
+    (ie, possibly initialized) bits. This may even extend past the original
+    alloca if loading those bits could produce valid data. The only way its
+    valid for us to zero-extend an integer load in SROA is if the original
+    code had a zero extend or those bits were undef. And we get to assume
+    things like undef *never* satifies nonnull, so non undef bits can
+    participate here. No need to special case the zero-extend handling, it
+    just falls out correctly.
+    
+    The original credit goes to Ariel Ben-Yehuda! I'm mostly landing this to
+    save a few rounds of trivial edits fixing style issues and test case
+    formulation.
+    
+    Differental Revision: D34285
+    
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306379 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/lib/Transforms/Scalar/SROA.cpp
++++ b/lib/Transforms/Scalar/SROA.cpp
+@@ -2388,9 +2388,20 @@
+       if (LI.isVolatile())
+         NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ 
++      // Any !nonnull metadata or !range metadata on the old load is also valid
++      // on the new load. This is even true in some cases even when the loads
++      // are different types, for example by mapping !nonnull metadata to
++      // !range metadata by modeling the null pointer constant converted to the
++      // integer type.
++      // FIXME: Add support for range metadata here. Currently the utilities
++      // for this don't propagate range metadata in trivial cases from one
++      // integer load to another, don't handle non-addrspace-0 null pointers
++      // correctly, and don't have any support for mapping ranges as the
++      // integer type becomes winder or narrower.
++      if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
++        copyNonnullMetadata(LI, N, *NewLI);
++
+       // Try to preserve nonnull metadata
+-      if (TargetTy->isPointerTy())
+-        NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
+       V = NewLI;
+ 
+       // If this is an integer load past the end of the slice (which means the
+--- a/test/Transforms/SROA/preserve-nonnull.ll
++++ b/test/Transforms/SROA/preserve-nonnull.ll
+@@ -42,4 +42,51 @@
+   ret float* %ret
+ }
+ 
++; Make sure we properly handle the !nonnull attribute when we convert
++; a pointer load to an integer load.
++; FIXME: While this doesn't do anythnig actively harmful today, it really
++; should propagate the !nonnull metadata to range metadata. The irony is, it
++; *does* initially, but then we lose that !range metadata before we finish
++; SROA.
++define i8* @propagate_nonnull_to_int() {
++; CHECK-LABEL: define i8* @propagate_nonnull_to_int(
++; CHECK-NEXT:  entry:
++; CHECK-NEXT:    %[[A:.*]] = alloca i64
++; CHECK-NEXT:    store i64 42, i64* %[[A]]
++; CHECK-NEXT:    %[[LOAD:.*]] = load volatile i64, i64* %[[A]]
++; CHECK-NEXT:    %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8*
++; CHECK-NEXT:    ret i8* %[[CAST]]
++entry:
++  %a = alloca [2 x i8*]
++  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
++  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
++  %a.gep0.cast = bitcast i8** %a.gep0 to i64*
++  %a.gep1.cast = bitcast i8** %a.gep1 to i64*
++  store i64 42, i64* %a.gep1.cast
++  store i64 0, i64* %a.gep0.cast
++  %load = load volatile i8*, i8** %a.gep1, !nonnull !0
++  ret i8* %load
++}
++
++; Make sure we properly handle the !nonnull attribute when we convert
++; a pointer load to an integer load and immediately promote it to an SSA
++; register. This can fail in interesting ways due to the rewrite iteration of
++; SROA, resulting in PR32902.
++define i8* @propagate_nonnull_to_int_and_promote() {
++; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote(
++; CHECK-NEXT:  entry:
++; CHECK-NEXT:    %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8*
++; CHECK-NEXT:    ret i8* %[[PROMOTED_VALUE]]
++entry:
++  %a = alloca [2 x i8*], align 8
++  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
++  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
++  %a.gep0.cast = bitcast i8** %a.gep0 to i64*
++  %a.gep1.cast = bitcast i8** %a.gep1 to i64*
++  store i64 42, i64* %a.gep1.cast
++  store i64 0, i64* %a.gep0.cast
++  %load = load i8*, i8** %a.gep1, align 8, !nonnull !0
++  ret i8* %load
++}
++
+ !0 = !{}
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/series llvm-toolchain-4.0-4.0.1/debian/patches/series
--- llvm-toolchain-4.0-4.0.1/debian/patches/series	2017-09-18 21:34:54.000000000 +0200
+++ llvm-toolchain-4.0-4.0.1/debian/patches/series	2017-10-18 15:28:20.000000000 +0200
@@ -38,7 +38,6 @@
 clang-tidy-run-bin.diff
 #bug-30342.diff
 fix-scan-view-path.diff
-#0011-SimplifyCFG-Hoisting-invalidates-metadata.patch
 clang-fix-cmpxchg8-detection-on-i386.patch
 lldb-addversion-suffix-to-llvm-server-exec.patch
 lldb-missing-install.diff
@@ -49,3 +48,13 @@
 ftfbs-gcc.diff
 pr81066.diff
 armhf-bitfield.diff
+# rust LLVM PR84, LLVM PR32488
+# This is actually Rust's backport of upstream RL305193 (which doesn't apply cleanly to LLVM 4)
+# https://github.com/rust-lang/llvm/commit/2b622a393ce
+rL305193-backport.diff
+# rust LLVM PR90, LLVM PR32902, PR31142
+rL298540.diff
+rL306267.diff
+rL306353.diff
+rL306358.diff
+rL306379.diff


More information about the Pkg-llvm-team mailing list