Bug#879016: llvm-toolchain-4.0: Please backport these upstream patches to fix two failing tests for rustc
Ximin Luo
infinity0 at debian.org
Wed Oct 18 14:06:00 UTC 2017
Ximin Luo:
> Please apply the attached debdiff, [..]
Whoops, here it is again rebased against the latest sid version, 1:4.0.1-7
X
--
GPG: ed25519/56034877E1F87C35
GPG: rsa4096/1318EFAC5FBBDBCE
https://github.com/infinity0/pubkeys.git
-------------- next part --------------
diff -Nru llvm-toolchain-4.0-4.0.1/debian/changelog llvm-toolchain-4.0-4.0.1/debian/changelog
--- llvm-toolchain-4.0-4.0.1/debian/changelog 2017-10-13 21:22:54.000000000 +0200
+++ llvm-toolchain-4.0-4.0.1/debian/changelog 2017-10-18 15:28:20.000000000 +0200
@@ -1,3 +1,11 @@
+llvm-toolchain-4.0 (1:4.0.1-7.1) UNRELEASED; urgency=medium
+
+ * Non-maintainer upload.
+ * Backport some patches (originally from rust, and upstreamed) to fix two
+ failing tests in rustc.
+
+ -- Ximin Luo <infinity0 at debian.org> Wed, 18 Oct 2017 15:28:20 +0200
+
llvm-toolchain-4.0 (1:4.0.1-7) unstable; urgency=medium
* Force the deactivation of ocaml until the transition is done
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch llvm-toolchain-4.0-4.0.1/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch
--- llvm-toolchain-4.0-4.0.1/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch 2017-04-16 19:13:22.000000000 +0200
+++ llvm-toolchain-4.0-4.0.1/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch 1970-01-01 01:00:00.000000000 +0100
@@ -1,84 +0,0 @@
-From eee68eafa7e8e4ce996b49f5551636639a6c331a Mon Sep 17 00:00:00 2001
-From: David Majnemer <david.majnemer at gmail.com>
-Date: Mon, 29 Aug 2016 17:14:08 +0000
-Subject: [PATCH 11/17] [SimplifyCFG] Hoisting invalidates metadata
-
-We forgot to remove optimization metadata when performing hosting during
-FoldTwoEntryPHINode.
-
-This fixes PR29163.
-
-git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279980 91177308-0d34-0410-b5e6-96231b3b80d8
----
- lib/Transforms/Utils/SimplifyCFG.cpp | 10 ++++++++--
- test/Transforms/SimplifyCFG/PR29163.ll | 31 +++++++++++++++++++++++++++++++
- 2 files changed, 39 insertions(+), 2 deletions(-)
- create mode 100644 test/Transforms/SimplifyCFG/PR29163.ll
-
-diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
-index 0504646..c197317 100644
---- a/lib/Transforms/Utils/SimplifyCFG.cpp
-+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
-@@ -2024,14 +2024,20 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
-
- // Move all 'aggressive' instructions, which are defined in the
- // conditional parts of the if's up to the dominating block.
-- if (IfBlock1)
-+ if (IfBlock1) {
-+ for (auto &I : *IfBlock1)
-+ I.dropUnknownNonDebugMetadata();
- DomBlock->getInstList().splice(InsertPt->getIterator(),
- IfBlock1->getInstList(), IfBlock1->begin(),
- IfBlock1->getTerminator()->getIterator());
-- if (IfBlock2)
-+ }
-+ if (IfBlock2) {
-+ for (auto &I : *IfBlock2)
-+ I.dropUnknownNonDebugMetadata();
- DomBlock->getInstList().splice(InsertPt->getIterator(),
- IfBlock2->getInstList(), IfBlock2->begin(),
- IfBlock2->getTerminator()->getIterator());
-+ }
-
- while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
- // Change the PHI node into a select instruction.
-diff --git a/test/Transforms/SimplifyCFG/PR29163.ll b/test/Transforms/SimplifyCFG/PR29163.ll
-new file mode 100644
-index 0000000..65f9090
---- /dev/null
-+++ b/test/Transforms/SimplifyCFG/PR29163.ll
-@@ -0,0 +1,31 @@
-+; RUN: opt -S -simplifycfg < %s | FileCheck %s
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-+target triple = "x86_64-unknown-linux-gnu"
-+
-+ at GV = external constant i64*
-+
-+define i64* @test1(i1 %cond, i8* %P) {
-+entry:
-+ br i1 %cond, label %if, label %then
-+
-+then:
-+ %bc = bitcast i8* %P to i64*
-+ br label %join
-+
-+if:
-+ %load = load i64*, i64** @GV, align 8, !dereferenceable !0
-+ br label %join
-+
-+join:
-+ %phi = phi i64* [ %bc, %then ], [ %load, %if ]
-+ ret i64* %phi
-+}
-+
-+; CHECK-LABEL: define i64* @test1(
-+; CHECK: %[[bc:.*]] = bitcast i8* %P to i64*
-+; CHECK: %[[load:.*]] = load i64*, i64** @GV, align 8{{$}}
-+; CHECK: %[[phi:.*]] = select i1 %cond, i64* %[[load]], i64* %[[bc]]
-+; CHECK: ret i64* %[[phi]]
-+
-+
-+!0 = !{i64 8}
---
-2.10.1
-
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/rL298540.diff llvm-toolchain-4.0-4.0.1/debian/patches/rL298540.diff
--- llvm-toolchain-4.0-4.0.1/debian/patches/rL298540.diff 1970-01-01 01:00:00.000000000 +0100
+++ llvm-toolchain-4.0-4.0.1/debian/patches/rL298540.diff 2017-10-18 15:28:20.000000000 +0200
@@ -0,0 +1,292 @@
+commit 2b66aeed20b61d42cd2782b94d5419c6faca49a1
+Author: Luqman Aden <me at luqman.ca>
+Date: Wed Mar 22 19:16:39 2017 +0000
+
+ Preserve nonnull metadata on Loads through SROA & mem2reg.
+
+ Summary:
+ https://llvm.org/bugs/show_bug.cgi?id=31142 :
+
+ SROA was dropping the nonnull metadata on loads from allocas that got optimized out. This patch simply preserves nonnull metadata on loads through SROA and mem2reg.
+
+ Reviewers: chandlerc, efriedma
+
+ Reviewed By: efriedma
+
+ Subscribers: hfinkel, spatel, efriedma, arielb1, davide, llvm-commits
+
+ Differential Revision: https://reviews.llvm.org/D27114
+
+ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298540 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/lib/Transforms/Scalar/SROA.cpp
++++ b/lib/Transforms/Scalar/SROA.cpp
+@@ -2387,6 +2387,10 @@
+ LI.isVolatile(), LI.getName());
+ if (LI.isVolatile())
+ NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
++
++ // Try to preserve nonnull metadata
++ if (TargetTy->isPointerTy())
++ NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
+ V = NewLI;
+
+ // If this is an integer load past the end of the slice (which means the
+--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
++++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+@@ -15,7 +15,6 @@
+ //
+ //===----------------------------------------------------------------------===//
+
+-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+ #include "llvm/ADT/ArrayRef.h"
+ #include "llvm/ADT/DenseMap.h"
+ #include "llvm/ADT/STLExtras.h"
+@@ -23,6 +22,7 @@
+ #include "llvm/ADT/SmallVector.h"
+ #include "llvm/ADT/Statistic.h"
+ #include "llvm/Analysis/AliasSetTracker.h"
++#include "llvm/Analysis/AssumptionCache.h"
+ #include "llvm/Analysis/InstructionSimplify.h"
+ #include "llvm/Analysis/IteratedDominanceFrontier.h"
+ #include "llvm/Analysis/ValueTracking.h"
+@@ -38,6 +38,7 @@
+ #include "llvm/IR/Metadata.h"
+ #include "llvm/IR/Module.h"
+ #include "llvm/Transforms/Utils/Local.h"
++#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+ #include <algorithm>
+ using namespace llvm;
+
+@@ -301,6 +302,18 @@
+
+ } // end of anonymous namespace
+
++/// Given a LoadInst LI this adds assume(LI != null) after it.
++static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
++ Function *AssumeIntrinsic =
++ Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
++ ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
++ Constant::getNullValue(LI->getType()));
++ LoadNotNull->insertAfter(LI);
++ CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
++ CI->insertAfter(LoadNotNull);
++ AC->registerAssumption(CI);
++}
++
+ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+ // Knowing that this alloca is promotable, we know that it's safe to kill all
+ // instructions except for load and store.
+@@ -334,9 +347,9 @@
+ /// and thus must be phi-ed with undef. We fall back to the standard alloca
+ /// promotion algorithm in that case.
+ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+- LargeBlockInfo &LBI,
+- DominatorTree &DT,
+- AliasSetTracker *AST) {
++ LargeBlockInfo &LBI, DominatorTree &DT,
++ AliasSetTracker *AST,
++ AssumptionCache *AC) {
+ StoreInst *OnlyStore = Info.OnlyStore;
+ bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+ BasicBlock *StoreBB = OnlyStore->getParent();
+@@ -387,6 +400,14 @@
+ // code.
+ if (ReplVal == LI)
+ ReplVal = UndefValue::get(LI->getType());
++
++ // If the load was marked as nonnull we don't want to lose
++ // that information when we erase this Load. So we preserve
++ // it with an assume.
++ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
++ !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
++ addAssumeNonNull(AC, LI);
++
+ LI->replaceAllUsesWith(ReplVal);
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+@@ -435,7 +456,9 @@
+ /// }
+ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
+ LargeBlockInfo &LBI,
+- AliasSetTracker *AST) {
++ AliasSetTracker *AST,
++ DominatorTree &DT,
++ AssumptionCache *AC) {
+ // The trickiest case to handle is when we have large blocks. Because of this,
+ // this code is optimized assuming that large blocks happen. This does not
+ // significantly pessimize the small block case. This uses LargeBlockInfo to
+@@ -476,10 +499,17 @@
+ // There is no store before this load, bail out (load may be affected
+ // by the following stores - see main comment).
+ return false;
+- }
+- else
++ } else {
+ // Otherwise, there was a store before this load, the load takes its value.
+- LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
++ // Note, if the load was marked as nonnull we don't want to lose that
++ // information when we erase it. So we preserve it with an assume.
++ Value *ReplVal = std::prev(I)->second->getOperand(0);
++ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
++ !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
++ addAssumeNonNull(AC, LI);
++
++ LI->replaceAllUsesWith(ReplVal);
++ }
+
+ if (AST && LI->getType()->isPointerTy())
+ AST->deleteValue(LI);
+@@ -553,7 +583,7 @@
+ // If there is only a single store to this value, replace any loads of
+ // it that are directly dominated by the definition with the value stored.
+ if (Info.DefiningBlocks.size() == 1) {
+- if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
++ if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST, AC)) {
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+ ++NumSingleStore;
+@@ -564,7 +594,7 @@
+ // If the alloca is only read and written in one basic block, just perform a
+ // linear sweep over the block to eliminate it.
+ if (Info.OnlyUsedInOneBlock &&
+- promoteSingleBlockAlloca(AI, Info, LBI, AST)) {
++ promoteSingleBlockAlloca(AI, Info, LBI, AST, DT, AC)) {
+ // The alloca has been processed, move on.
+ RemoveFromAllocasList(AllocaNum);
+ continue;
+@@ -940,6 +970,13 @@
+
+ Value *V = IncomingVals[AI->second];
+
++ // If the load was marked as nonnull we don't want to lose
++ // that information when we erase this Load. So we preserve
++ // it with an assume.
++ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
++ !llvm::isKnownNonNullAt(V, LI, &DT))
++ addAssumeNonNull(AC, LI);
++
+ // Anything using the load now uses the current value.
+ LI->replaceAllUsesWith(V);
+ if (AST && LI->getType()->isPointerTy())
+--- /dev/null
++++ b/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll
+@@ -0,0 +1,89 @@
++; RUN: opt < %s -mem2reg -S | FileCheck %s
++
++; This tests that mem2reg preserves the !nonnull metadata on loads
++; from allocas that get optimized out.
++
++; Check the case where the alloca in question has a single store.
++define float* @single_store(float** %arg) {
++; CHECK-LABEL: define float* @single_store
++; CHECK: %arg.load = load float*, float** %arg, align 8
++; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
++; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
++; CHECK: ret float* %arg.load
++entry:
++ %buf = alloca float*
++ %arg.load = load float*, float** %arg, align 8
++ store float* %arg.load, float** %buf, align 8
++ %buf.load = load float*, float **%buf, !nonnull !0
++ ret float* %buf.load
++}
++
++; Check the case where the alloca in question has more than one
++; store but still within one basic block.
++define float* @single_block(float** %arg) {
++; CHECK-LABEL: define float* @single_block
++; CHECK: %arg.load = load float*, float** %arg, align 8
++; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
++; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
++; CHECK: ret float* %arg.load
++entry:
++ %buf = alloca float*
++ %arg.load = load float*, float** %arg, align 8
++ store float* null, float** %buf, align 8
++ store float* %arg.load, float** %buf, align 8
++ %buf.load = load float*, float **%buf, !nonnull !0
++ ret float* %buf.load
++}
++
++; Check the case where the alloca in question has more than one
++; store and also reads ands writes in multiple blocks.
++define float* @multi_block(float** %arg) {
++; CHECK-LABEL: define float* @multi_block
++; CHECK-LABEL: entry:
++; CHECK: %arg.load = load float*, float** %arg, align 8
++; CHECK: br label %next
++; CHECK-LABEL: next:
++; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
++; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
++; CHECK: ret float* %arg.load
++entry:
++ %buf = alloca float*
++ %arg.load = load float*, float** %arg, align 8
++ store float* null, float** %buf, align 8
++ br label %next
++next:
++ store float* %arg.load, float** %buf, align 8
++ %buf.load = load float*, float** %buf, !nonnull !0
++ ret float* %buf.load
++}
++
++; Check that we don't add an assume if it's not
++; necessary i.e. the value is already implied to be nonnull
++define float* @no_assume(float** %arg) {
++; CHECK-LABEL: define float* @no_assume
++; CHECK-LABEL: entry:
++; CHECK: %arg.load = load float*, float** %arg, align 8
++; CHECK: %cn = icmp ne float* %arg.load, null
++; CHECK: br i1 %cn, label %next, label %fin
++; CHECK-LABEL: next:
++; CHECK-NOT: call void @llvm.assume
++; CHECK: ret float* %arg.load
++; CHECK-LABEL: fin:
++; CHECK: ret float* null
++entry:
++ %buf = alloca float*
++ %arg.load = load float*, float** %arg, align 8
++ %cn = icmp ne float* %arg.load, null
++ br i1 %cn, label %next, label %fin
++next:
++; At this point the above nonnull check ensures that
++; the value %arg.load is nonnull in this block and thus
++; we need not add the assume.
++ store float* %arg.load, float** %buf, align 8
++ %buf.load = load float*, float** %buf, !nonnull !0
++ ret float* %buf.load
++fin:
++ ret float* null
++}
++
++!0 = !{}
+--- /dev/null
++++ b/test/Transforms/SROA/preserve-nonnull.ll
+@@ -0,0 +1,26 @@
++; RUN: opt < %s -sroa -S | FileCheck %s
++;
++; Make sure that SROA doesn't lose nonnull metadata
++; on loads from allocas that get optimized out.
++
++; CHECK-LABEL: define float* @yummy_nonnull
++; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
++; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
++; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
++; CHECK: ret float* {{.*}}[[RETURN]]
++
++define float* @yummy_nonnull(float** %arg) {
++entry-block:
++ %buf = alloca float*
++
++ %_arg_i8 = bitcast float** %arg to i8*
++ %_buf_i8 = bitcast float** %buf to i8*
++ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
++
++ %ret = load float*, float** %buf, align 8, !nonnull !0
++ ret float* %ret
++}
++
++declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
++
++!0 = !{}
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/rL305193-backport.diff llvm-toolchain-4.0-4.0.1/debian/patches/rL305193-backport.diff
--- llvm-toolchain-4.0-4.0.1/debian/patches/rL305193-backport.diff 1970-01-01 01:00:00.000000000 +0100
+++ llvm-toolchain-4.0-4.0.1/debian/patches/rL305193-backport.diff 2017-10-18 15:28:20.000000000 +0200
@@ -0,0 +1,442 @@
+commit 2b622a393ce80c6157d32a50bf67d6b830729469
+Author: Than McIntosh <thanm at google.com>
+Date: Mon Jun 12 14:56:02 2017 +0000
+
+ StackColoring: smarter check for slot overlap
+
+ Summary:
+ The old check for slot overlap treated 2 slots `S` and `T` as
+ overlapping if there existed a CFG node in which both of the slots could
+ possibly be active. That is overly conservative and caused stack blowups
+ in Rust programs. Instead, check whether there is a single CFG node in
+ which both of the slots are possibly active *together*.
+
+ Fixes PR32488.
+
+ Patch by Ariel Ben-Yehuda <ariel.byd at gmail.com>
+
+ Reviewers: thanm, nagisa, llvm-commits, efriedma, rnk
+
+ Reviewed By: thanm
+
+ Subscribers: dotdash
+
+ Differential Revision: https://reviews.llvm.org/D31583
+
+ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305193 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/lib/CodeGen/StackColoring.cpp
++++ b/lib/CodeGen/StackColoring.cpp
+@@ -87,10 +87,134 @@
+ STATISTIC(StackSlotMerged, "Number of stack slot merged.");
+ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
+
++//===----------------------------------------------------------------------===//
++// StackColoring Pass
++//===----------------------------------------------------------------------===//
++//
++// Stack Coloring reduces stack usage by merging stack slots when they
++// can't be used together. For example, consider the following C program:
++//
++// void bar(char *, int);
++// void foo(bool var) {
++// A: {
++// char z[4096];
++// bar(z, 0);
++// }
++//
++// char *p;
++// char x[4096];
++// char y[4096];
++// if (var) {
++// p = x;
++// } else {
++// bar(y, 1);
++// p = y + 1024;
++// }
++// B:
++// bar(p, 2);
++// }
++//
++// Naively-compiled, this program would use 12k of stack space. However, the
++// stack slot corresponding to `z` is always destroyed before either of the
++// stack slots for `x` or `y` are used, and then `x` is only used if `var`
++// is true, while `y` is only used if `var` is false. So in no time are 2
++// of the stack slots used together, and therefore we can merge them,
++// compiling the function using only a single 4k alloca:
++//
++// void foo(bool var) { // equivalent
++// char x[4096];
++// char *p;
++// bar(x, 0);
++// if (var) {
++// p = x;
++// } else {
++// bar(x, 1);
++// p = x + 1024;
++// }
++// bar(p, 2);
++// }
++//
++// This is an important optimization if we want stack space to be under
++// control in large functions, both open-coded ones and ones created by
++// inlining.
+ //
+ // Implementation Notes:
+ // ---------------------
+ //
++// An important part of the above reasoning is that `z` can't be accessed
++// while the latter 2 calls to `bar` are running. This is justified because
++// `z`'s lifetime is over after we exit from block `A:`, so any further
++// accesses to it would be UB. The way we represent this information
++// in LLVM is by having frontends delimit blocks with `lifetime.start`
++// and `lifetime.end` intrinsics.
++//
++// The effect of these intrinsics seems to be as follows (maybe I should
++// specify this in the reference?):
++//
++// L1) at start, each stack-slot is marked as *out-of-scope*, unless no
++// lifetime intrinsic refers to that stack slot, in which case
++// it is marked as *in-scope*.
++// L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
++// the stack slot is overwritten with `undef`.
++// L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
++// L4) on function exit, all stack slots are marked as *out-of-scope*.
++// L5) `lifetime.end` is a no-op when called on a slot that is already
++// *out-of-scope*.
++// L6) memory accesses to *out-of-scope* stack slots are UB.
++// L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
++// are invalidated, unless the slot is "degenerate". This is used to
++// justify not marking slots as in-use until the pointer to them is
++// used, but feels a bit hacky in the presence of things like LICM. See
++// the "Degenerate Slots" section for more details.
++//
++// Now, let's ground stack coloring on these rules. We'll define a slot
++// as *in-use* at a (dynamic) point in execution if it either can be
++// written to at that point, or if it has a live and non-undef content
++// at that point.
++//
++// Obviously, slots that are never *in-use* together can be merged, and
++// in our example `foo`, the slots for `x`, `y` and `z` are never
++// in-use together (of course, sometimes slots that *are* in-use together
++// might still be mergable, but we don't care about that here).
++//
++// In this implementation, we successively merge pairs of slots that are
++// not *in-use* together. We could be smarter - for example, we could merge
++// a single large slot with 2 small slots, or we could construct the
++// interference graph and run a "smart" graph coloring algorithm, but with
++// that aside, how do we find out whether a pair of slots might be *in-use*
++// together?
++//
++// From our rules, we see that *out-of-scope* slots are never *in-use*,
++// and from (L7) we see that "non-degenerate" slots remain non-*in-use*
++// until their address is taken. Therefore, we can approximate slot activity
++// using dataflow.
++//
++// A subtle point: naively, we might try to figure out which pairs of
++// stack-slots interfere by propagating `S in-use` through the CFG for every
++// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
++// which they are both *in-use*.
++//
++// That is sound, but overly conservative in some cases: in our (artificial)
++// example `foo`, either `x` or `y` might be in use at the label `B:`, but
++// as `x` is only in use if we came in from the `var` edge and `y` only
++// if we came from the `!var` edge, they still can't be in use together.
++// See PR32488 for an important real-life case.
++//
++// If we wanted to find all points of interference precisely, we could
++// propagate `S in-use` and `S&T in-use` predicates through the CFG. That
++// would be precise, but requires propagating `O(n^2)` dataflow facts.
++//
++// However, we aren't interested in the *set* of points of interference
++// between 2 stack slots, only *whether* there *is* such a point. So we
++// can rely on a little trick: for `S` and `T` to be in-use together,
++// one of them needs to become in-use while the other is in-use (or
++// they might both become in use simultaneously). We can check this
++// by also keeping track of the points at which a stack slot might *start*
++// being in-use.
++//
++// Exact first use:
++// ----------------
++//
+ // Consider the following motivating example:
+ //
+ // int foo() {
+@@ -159,6 +283,9 @@
+ // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
+ // byte stack (better).
+ //
++// Degenerate Slots:
++// -----------------
++//
+ // Relying entirely on first-use of stack slots is problematic,
+ // however, due to the fact that optimizations can sometimes migrate
+ // uses of a variable outside of its lifetime start/end region. Here
+@@ -238,10 +365,6 @@
+ // for "b" then it will appear that 'b' has a degenerate lifetime.
+ //
+
+-//===----------------------------------------------------------------------===//
+-// StackColoring Pass
+-//===----------------------------------------------------------------------===//
+-
+ namespace {
+ /// StackColoring - A machine pass for merging disjoint stack allocations,
+ /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
+@@ -272,8 +395,11 @@
+ /// Maps basic blocks to a serial number.
+ SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
+
+- /// Maps liveness intervals for each slot.
++ /// Maps slots to their use interval. Outside of this interval, slots
++ /// values are either dead or `undef` and they will not be written to.
+ SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
++ /// Maps slots to the points where they can become in-use.
++ SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
+ /// VNInfo is used for the construction of LiveIntervals.
+ VNInfo::Allocator VNInfoAllocator;
+ /// SlotIndex analysis object.
+@@ -676,15 +802,22 @@
+
+ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
+ SmallVector<SlotIndex, 16> Starts;
+- SmallVector<SlotIndex, 16> Finishes;
++ SmallVector<bool, 16> DefinitelyInUse;
+
+ // For each block, find which slots are active within this block
+ // and update the live intervals.
+ for (const MachineBasicBlock &MBB : *MF) {
+ Starts.clear();
+ Starts.resize(NumSlots);
+- Finishes.clear();
+- Finishes.resize(NumSlots);
++ DefinitelyInUse.clear();
++ DefinitelyInUse.resize(NumSlots);
++
++ // Start the interval of the slots that we previously found to be 'in-use'.
++ BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
++ for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
++ pos = MBBLiveness.LiveIn.find_next(pos)) {
++ Starts[pos] = Indexes->getMBBStartIdx(&MBB);
++ }
+
+ // Create the interval for the basic blocks containing lifetime begin/end.
+ for (const MachineInstr &MI : MBB) {
+@@ -696,68 +829,35 @@
+ SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
+ for (auto Slot : slots) {
+ if (IsStart) {
+- if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
++ // If a slot is already definitely in use, we don't have to emit
++ // a new start marker because there is already a pre-existing
++ // one.
++ if (!DefinitelyInUse[Slot]) {
++ LiveStarts[Slot].push_back(ThisIndex);
++ DefinitelyInUse[Slot] = true;
++ }
++ if (!Starts[Slot].isValid())
+ Starts[Slot] = ThisIndex;
+ } else {
+- if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
+- Finishes[Slot] = ThisIndex;
++ if (Starts[Slot].isValid()) {
++ VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
++ Intervals[Slot]->addSegment(
++ LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
++ Starts[Slot] = SlotIndex(); // Invalidate the start index
++ DefinitelyInUse[Slot] = false;
++ }
+ }
+ }
+ }
+
+- // Create the interval of the blocks that we previously found to be 'alive'.
+- BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
+- for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
+- pos = MBBLiveness.LiveIn.find_next(pos)) {
+- Starts[pos] = Indexes->getMBBStartIdx(&MBB);
+- }
+- for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1;
+- pos = MBBLiveness.LiveOut.find_next(pos)) {
+- Finishes[pos] = Indexes->getMBBEndIdx(&MBB);
+- }
+-
++ // Finish up started segments
+ for (unsigned i = 0; i < NumSlots; ++i) {
+- //
+- // When LifetimeStartOnFirstUse is turned on, data flow analysis
+- // is forward (from starts to ends), not bidirectional. A
+- // consequence of this is that we can wind up in situations
+- // where Starts[i] is invalid but Finishes[i] is valid and vice
+- // versa. Example:
+- //
+- // LIFETIME_START x
+- // if (...) {
+- // <use of x>
+- // throw ...;
+- // }
+- // LIFETIME_END x
+- // return 2;
+- //
+- //
+- // Here the slot for "x" will not be live into the block
+- // containing the "return 2" (since lifetimes start with first
+- // use, not at the dominating LIFETIME_START marker).
+- //
+- if (Starts[i].isValid() && !Finishes[i].isValid()) {
+- Finishes[i] = Indexes->getMBBEndIdx(&MBB);
+- }
+ if (!Starts[i].isValid())
+ continue;
+
+- assert(Starts[i] && Finishes[i] && "Invalid interval");
+- VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
+- SlotIndex S = Starts[i];
+- SlotIndex F = Finishes[i];
+- if (S < F) {
+- // We have a single consecutive region.
+- Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum));
+- } else {
+- // We have two non-consecutive regions. This happens when
+- // LIFETIME_START appears after the LIFETIME_END marker.
+- SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB);
+- SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB);
+- Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum));
+- Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum));
+- }
++ SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
++ VNInfo *VNI = Intervals[i]->getValNumInfo(0);
++ Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
+ }
+ }
+ }
+@@ -987,6 +1087,7 @@
+ BasicBlockNumbering.clear();
+ Markers.clear();
+ Intervals.clear();
++ LiveStarts.clear();
+ VNInfoAllocator.Reset();
+
+ unsigned NumSlots = MFI->getObjectIndexEnd();
+@@ -998,6 +1099,7 @@
+ SmallVector<int, 8> SortedSlots;
+ SortedSlots.reserve(NumSlots);
+ Intervals.reserve(NumSlots);
++ LiveStarts.resize(NumSlots);
+
+ unsigned NumMarkers = collectMarkers(NumSlots);
+
+@@ -1069,6 +1171,9 @@
+ return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
+ });
+
++ for (auto &s : LiveStarts)
++ std::sort(s.begin(), s.end());
++
+ bool Changed = true;
+ while (Changed) {
+ Changed = false;
+@@ -1084,12 +1189,22 @@
+ int SecondSlot = SortedSlots[J];
+ LiveInterval *First = &*Intervals[FirstSlot];
+ LiveInterval *Second = &*Intervals[SecondSlot];
++ auto &FirstS = LiveStarts[FirstSlot];
++ auto &SecondS = LiveStarts[SecondSlot];
+ assert (!First->empty() && !Second->empty() && "Found an empty range");
+
+- // Merge disjoint slots.
+- if (!First->overlaps(*Second)) {
++ // Merge disjoint slots. This is a little bit tricky - see the
++ // Implementation Notes section for an explanation.
++ if (!First->isLiveAtIndexes(SecondS) &&
++ !Second->isLiveAtIndexes(FirstS)) {
+ Changed = true;
+ First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
++
++ int OldSize = FirstS.size();
++ FirstS.append(SecondS.begin(), SecondS.end());
++ auto Mid = FirstS.begin() + OldSize;
++ std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
++
+ SlotRemap[SecondSlot] = FirstSlot;
+ SortedSlots[J] = -1;
+ DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
+--- a/test/CodeGen/X86/StackColoring.ll
++++ b/test/CodeGen/X86/StackColoring.ll
+@@ -582,12 +582,76 @@
+ ret i32 %x.addr.0
+ }
+
++;CHECK-LABEL: multi_segment:
++;YESCOLOR: subq $256, %rsp
++;NOFIRSTUSE: subq $256, %rsp
++;NOCOLOR: subq $512, %rsp
++define i1 @multi_segment(i1, i1)
++{
++entry-block:
++ %foo = alloca [32 x i64]
++ %bar = alloca [32 x i64]
++ %foo_i8 = bitcast [32 x i64]* %foo to i8*
++ %bar_i8 = bitcast [32 x i64]* %bar to i8*
++ call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
++ call void @baz([32 x i64]* %bar, i32 1)
++ call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
++ call void @llvm.lifetime.start.p0i8(i64 256, i8* %foo_i8)
++ call void @baz([32 x i64]* %foo, i32 1)
++ call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
++ call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
++ call void @baz([32 x i64]* %bar, i32 1)
++ call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
++ ret i1 true
++}
++
++;CHECK-LABEL: pr32488:
++;YESCOLOR: subq $256, %rsp
++;NOFIRSTUSE: subq $256, %rsp
++;NOCOLOR: subq $512, %rsp
++define i1 @pr32488(i1, i1)
++{
++entry-block:
++ %foo = alloca [32 x i64]
++ %bar = alloca [32 x i64]
++ %foo_i8 = bitcast [32 x i64]* %foo to i8*
++ %bar_i8 = bitcast [32 x i64]* %bar to i8*
++ br i1 %0, label %if_false, label %if_true
++if_false:
++ call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
++ call void @baz([32 x i64]* %bar, i32 0)
++ br i1 %1, label %if_false.1, label %onerr
++if_false.1:
++ call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
++ br label %merge
++if_true:
++ call void @llvm.lifetime.start.p0i8(i64 256, i8* %foo_i8)
++ call void @baz([32 x i64]* %foo, i32 1)
++ br i1 %1, label %if_true.1, label %onerr
++if_true.1:
++ call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
++ br label %merge
++merge:
++ ret i1 false
++onerr:
++ call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
++ call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
++ call void @destructor()
++ ret i1 true
++}
++
++%Data = type { [32 x i64] }
++
++declare void @destructor()
++
+ declare void @inita(i32*)
+
+ declare void @initb(i32*,i32*,i32*)
+
+ declare void @bar([100 x i32]* , [100 x i32]*) nounwind
+
++declare void @baz([32 x i64]*, i32)
++
+ declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+ declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/rL306267.diff llvm-toolchain-4.0-4.0.1/debian/patches/rL306267.diff
--- llvm-toolchain-4.0-4.0.1/debian/patches/rL306267.diff 1970-01-01 01:00:00.000000000 +0100
+++ llvm-toolchain-4.0-4.0.1/debian/patches/rL306267.diff 2017-10-18 15:28:20.000000000 +0200
@@ -0,0 +1,187 @@
+commit 5a057dc8edbb63887f8c611dd8ddf1b76997f07c
+Author: Chandler Carruth <chandlerc at gmail.com>
+Date: Mon Jun 26 03:31:31 2017 +0000
+
+ [InstCombine] Factor the logic for propagating !nonnull and !range
+ metadata out of InstCombine and into helpers.
+
+ NFC, this just exposes the logic used by InstCombine when propagating
+ metadata from one load instruction to another. The plan is to use this
+ in SROA to address PR32902.
+
+ If anyone has better ideas about how to factor this or name variables,
+ I'm all ears, but this seemed like a pretty good start and lets us make
+ progress on the PR.
+
+ This is based on a patch by Ariel Ben-Yehuda (D34285).
+
+ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306267 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/include/llvm/Transforms/Utils/Local.h
++++ b/include/llvm/Transforms/Utils/Local.h
+@@ -366,6 +366,19 @@
+ /// during lowering by the GC infrastructure.
+ bool callsGCLeafFunction(ImmutableCallSite CS);
+
++/// Copy a nonnull metadata node to a new load instruction.
++///
++/// This handles mapping it to range metadata if the new load is an integer
++/// load instead of a pointer load.
++void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI);
++
++/// Copy a range metadata node to a new load instruction.
++///
++/// This handles mapping it to nonnull metadata if the new load is a pointer
++/// load instead of an integer load and the range doesn't cover null.
++void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N,
++ LoadInst &NewLI);
++
+ //===----------------------------------------------------------------------===//
+ // Intrinsic pattern matching
+ //
+--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
++++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+@@ -471,21 +471,7 @@
+ break;
+
+ case LLVMContext::MD_nonnull:
+- // This only directly applies if the new type is also a pointer.
+- if (NewTy->isPointerTy()) {
+- NewLoad->setMetadata(ID, N);
+- break;
+- }
+- // If it's integral now, translate it to !range metadata.
+- if (NewTy->isIntegerTy()) {
+- auto *ITy = cast<IntegerType>(NewTy);
+- auto *NullInt = ConstantExpr::getPtrToInt(
+- ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
+- auto *NonNullInt =
+- ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
+- NewLoad->setMetadata(LLVMContext::MD_range,
+- MDB.createRange(NonNullInt, NullInt));
+- }
++ copyNonnullMetadata(LI, N, *NewLoad);
+ break;
+ case LLVMContext::MD_align:
+ case LLVMContext::MD_dereferenceable:
+@@ -495,17 +481,7 @@
+ NewLoad->setMetadata(ID, N);
+ break;
+ case LLVMContext::MD_range:
+- // FIXME: It would be nice to propagate this in some way, but the type
+- // conversions make it hard.
+-
+- // If it's a pointer now and the range does not contain 0, make it !nonnull.
+- if (NewTy->isPointerTy()) {
+- unsigned BitWidth = IC.getDataLayout().getTypeSizeInBits(NewTy);
+- if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
+- MDNode *NN = MDNode::get(LI.getContext(), None);
+- NewLoad->setMetadata(LLVMContext::MD_nonnull, NN);
+- }
+- }
++ copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
+ break;
+ }
+ }
+--- a/lib/Transforms/Utils/Local.cpp
++++ b/lib/Transforms/Utils/Local.cpp
+@@ -26,6 +26,7 @@
+ #include "llvm/Analysis/LazyValueInfo.h"
+ #include "llvm/Analysis/ValueTracking.h"
+ #include "llvm/IR/CFG.h"
++#include "llvm/IR/ConstantRange.h"
+ #include "llvm/IR/Constants.h"
+ #include "llvm/IR/DIBuilder.h"
+ #include "llvm/IR/DataLayout.h"
+@@ -1069,7 +1070,7 @@
+ }
+
+ /// See if there is a dbg.value intrinsic for DIVar for the PHI node.
+-static bool PhiHasDebugValue(DILocalVariable *DIVar,
++static bool PhiHasDebugValue(DILocalVariable *DIVar,
+ DIExpression *DIExpr,
+ PHINode *APN) {
+ // Since we can't guarantee that the original dbg.declare instrinsic
+@@ -1152,7 +1153,7 @@
+ DbgValue->insertAfter(LI);
+ }
+
+-/// Inserts a llvm.dbg.value intrinsic after a phi
++/// Inserts a llvm.dbg.value intrinsic after a phi
+ /// that has an associated llvm.dbg.decl intrinsic.
+ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+ PHINode *APN, DIBuilder &Builder) {
+@@ -1723,12 +1724,12 @@
+ // Preserve !invariant.group in K.
+ break;
+ case LLVMContext::MD_align:
+- K->setMetadata(Kind,
++ K->setMetadata(Kind,
+ MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+ break;
+ case LLVMContext::MD_dereferenceable:
+ case LLVMContext::MD_dereferenceable_or_null:
+- K->setMetadata(Kind,
++ K->setMetadata(Kind,
+ MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+ break;
+ }
+@@ -1812,6 +1813,49 @@
+ return false;
+ }
+
++void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
++ LoadInst &NewLI) {
++ auto *NewTy = NewLI.getType();
++
++ // This only directly applies if the new type is also a pointer.
++ if (NewTy->isPointerTy()) {
++ NewLI.setMetadata(LLVMContext::MD_nonnull, N);
++ return;
++ }
++
++ // The only other translation we can do is to integral loads with !range
++ // metadata.
++ if (!NewTy->isIntegerTy())
++ return;
++
++ MDBuilder MDB(NewLI.getContext());
++ const Value *Ptr = OldLI.getPointerOperand();
++ auto *ITy = cast<IntegerType>(NewTy);
++ auto *NullInt = ConstantExpr::getPtrToInt(
++ ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
++ auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
++ NewLI.setMetadata(LLVMContext::MD_range,
++ MDB.createRange(NonNullInt, NullInt));
++}
++
++void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
++ MDNode *N, LoadInst &NewLI) {
++ auto *NewTy = NewLI.getType();
++
++ // Give up unless it is converted to a pointer where there is a single very
++ // valuable mapping we can do reliably.
++ // FIXME: It would be nice to propagate this in more ways, but the type
++ // conversions make it hard.
++ if (!NewTy->isPointerTy())
++ return;
++
++ unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
++ if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
++ MDNode *NN = MDNode::get(OldLI.getContext(), None);
++ NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
++ }
++}
++
+ namespace {
+ /// A potential constituent of a bitreverse or bswap expression. See
+ /// collectBitParts for a fuller explanation.
+@@ -1933,7 +1977,7 @@
+ unsigned NumMaskedBits = AndMask.countPopulation();
+ if (!MatchBitReversals && NumMaskedBits % 8 != 0)
+ return Result;
+-
++
+ auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
+ MatchBitReversals, BPS);
+ if (!Res)
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/rL306353.diff llvm-toolchain-4.0-4.0.1/debian/patches/rL306353.diff
--- llvm-toolchain-4.0-4.0.1/debian/patches/rL306353.diff 1970-01-01 01:00:00.000000000 +0100
+++ llvm-toolchain-4.0-4.0.1/debian/patches/rL306353.diff 2017-10-18 15:28:20.000000000 +0200
@@ -0,0 +1,47 @@
+commit 477bd758b48cb96477d3dd4cf3b36bf2706e8c1d
+Author: Chandler Carruth <chandlerc at gmail.com>
+Date: Tue Jun 27 02:23:15 2017 +0000
+
+ [SROA] Clean up a test case a bit prior to adding more testing for
+ nonnull as part of fixing PR32902.
+
+ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306353 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/test/Transforms/SROA/preserve-nonnull.ll
++++ b/test/Transforms/SROA/preserve-nonnull.ll
+@@ -3,22 +3,20 @@
+ ; Make sure that SROA doesn't lose nonnull metadata
+ ; on loads from allocas that get optimized out.
+
+-; CHECK-LABEL: define float* @yummy_nonnull
+-; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
+-; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
+-; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
+-; CHECK: ret float* {{.*}}[[RETURN]]
+-
+ define float* @yummy_nonnull(float** %arg) {
+-entry-block:
+- %buf = alloca float*
+-
+- %_arg_i8 = bitcast float** %arg to i8*
+- %_buf_i8 = bitcast float** %buf to i8*
+- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
+-
+- %ret = load float*, float** %buf, align 8, !nonnull !0
+- ret float* %ret
++; CHECK-LABEL: define float* @yummy_nonnull(
++; CHECK-NEXT: entry:
++; CHECK-NEXT: %[[RETURN:.*]] = load float*, float** %arg, align 8
++; CHECK-NEXT: %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
++; CHECK-NEXT: call void @llvm.assume(i1 %[[ASSUME]])
++; CHECK-NEXT: ret float* %[[RETURN]]
++entry:
++ %buf = alloca float*
++ %_arg_i8 = bitcast float** %arg to i8*
++ %_buf_i8 = bitcast float** %buf to i8*
++ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
++ %ret = load float*, float** %buf, align 8, !nonnull !0
++ ret float* %ret
+ }
+
+ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/rL306358.diff llvm-toolchain-4.0-4.0.1/debian/patches/rL306358.diff
--- llvm-toolchain-4.0-4.0.1/debian/patches/rL306358.diff 1970-01-01 01:00:00.000000000 +0100
+++ llvm-toolchain-4.0-4.0.1/debian/patches/rL306358.diff 2017-10-18 15:28:20.000000000 +0200
@@ -0,0 +1,52 @@
+commit 156cc49e505986a1659adaa3a0b5a070372377c8
+Author: Chandler Carruth <chandlerc at gmail.com>
+Date: Tue Jun 27 03:08:45 2017 +0000
+
+ [SROA] Further test cleanup and add a test for the actual propagation of
+ the nonnull attribute distinct from rewriting it into an assume.
+
+ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306358 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/test/Transforms/SROA/preserve-nonnull.ll
++++ b/test/Transforms/SROA/preserve-nonnull.ll
+@@ -3,8 +3,31 @@
+ ; Make sure that SROA doesn't lose nonnull metadata
+ ; on loads from allocas that get optimized out.
+
+-define float* @yummy_nonnull(float** %arg) {
+-; CHECK-LABEL: define float* @yummy_nonnull(
++declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
++
++; Check that we do basic propagation of nonnull when rewriting.
++define i8* @propagate_nonnull(i32* %v) {
++; CHECK-LABEL: define i8* @propagate_nonnull(
++; CHECK-NEXT: entry:
++; CHECK-NEXT: %[[A:.*]] = alloca i8*
++; CHECK-NEXT: %[[V_CAST:.*]] = bitcast i32* %v to i8*
++; CHECK-NEXT: store i8* %[[V_CAST]], i8** %[[A]]
++; CHECK-NEXT: %[[LOAD:.*]] = load volatile i8*, i8** %[[A]], !nonnull !0
++; CHECK-NEXT: ret i8* %[[LOAD]]
++entry:
++ %a = alloca [2 x i8*]
++ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
++ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
++ %a.gep0.cast = bitcast i8** %a.gep0 to i32**
++ %a.gep1.cast = bitcast i8** %a.gep1 to i32**
++ store i32* %v, i32** %a.gep1.cast
++ store i32* null, i32** %a.gep0.cast
++ %load = load volatile i8*, i8** %a.gep1, !nonnull !0
++ ret i8* %load
++}
++
++define float* @turn_nonnull_into_assume(float** %arg) {
++; CHECK-LABEL: define float* @turn_nonnull_into_assume(
+ ; CHECK-NEXT: entry:
+ ; CHECK-NEXT: %[[RETURN:.*]] = load float*, float** %arg, align 8
+ ; CHECK-NEXT: %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
+@@ -19,6 +42,4 @@
+ ret float* %ret
+ }
+
+-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+-
+ !0 = !{}
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/rL306379.diff llvm-toolchain-4.0-4.0.1/debian/patches/rL306379.diff
--- llvm-toolchain-4.0-4.0.1/debian/patches/rL306379.diff 1970-01-01 01:00:00.000000000 +0100
+++ llvm-toolchain-4.0-4.0.1/debian/patches/rL306379.diff 2017-10-18 15:28:20.000000000 +0200
@@ -0,0 +1,147 @@
+commit 7df06519765b14e1b08d7034c82c45a0a653eb25
+Author: Chandler Carruth <chandlerc at gmail.com>
+Date: Tue Jun 27 08:32:03 2017 +0000
+
+ [SROA] Fix PR32902 by more carefully propagating !nonnull metadata.
+
+ This is based heavily on the work done ni D34285. I mostly wanted to do
+ test cleanup for the author to save them some time, but I had a really
+ hard time understanding why it was so hard to write better test cases
+ for these issues.
+
+ The problem is that because SROA does a second rewrite of the loads and
+ because we *don't* propagate !nonnull for non-pointer loads, we first
+ introduced invalid !nonnull metadata and then stripped it back off just
+ in time to avoid most ways of this PR manifesting. Moving to the more
+ careful utility only fixes this by changing the predicate to look at the
+ new load's type rather than the target type. However, that *does* fix
+ the bug, and the utility is much nicer including adding range metadata
+ to model the nonnull property after a conversion to an integer.
+
+ However, we have bigger problems because we don't actually propagate
+ *range* metadata, and the utility to do this extracted from instcombine
+ isn't really in good shape to do this currently. It *only* handles the
+ case of copying range metadata from an integer load to a pointer load.
+ It doesn't even handle the trivial cases of propagating from one integer
+ load to another when they are the same width! This utility will need to
+ be beefed up prior to using in this location to get the metadata to
+ fully survive.
+
+ And even then, we need to go and teach things to turn the range metadata
+ into an assume the way we do with nonnull so that when we *promote* an
+ integer we don't lose the information.
+
+ All of this will require a new test case that looks kind-of like
+ `preserve-nonnull.ll` does here but focuses on range metadata. It will
+ also likely require more testing because it needs to correctly handle
+ changes to the integer width, especially as SROA actively tries to
+ change the integer width!
+
+ Last but not least, I'm a little worried about hooking the range
+ metadata up here because the instcombine logic for converting from
+ a range metadata *to* a nonnull metadata node seems broken in the face
+ of non-zero address spaces where null is not mapped to the integer `0`.
+ So that probably needs to get fixed with test cases both in SROA and in
+ instcombine to cover it.
+
+ But this *does* extract the core PR fix from D34285 of preventing the
+ !nonnull metadata from being propagated in a broken state just long
+ enough to feed into promotion and crash value tracking.
+
+ On D34285 there is some discussion of zero-extend handling because it
+ isn't necessary. First, the new load size covers all of the non-undef
+ (ie, possibly initialized) bits. This may even extend past the original
+ alloca if loading those bits could produce valid data. The only way its
+ valid for us to zero-extend an integer load in SROA is if the original
+ code had a zero extend or those bits were undef. And we get to assume
+ things like undef *never* satifies nonnull, so non undef bits can
+ participate here. No need to special case the zero-extend handling, it
+ just falls out correctly.
+
+ The original credit goes to Ariel Ben-Yehuda! I'm mostly landing this to
+ save a few rounds of trivial edits fixing style issues and test case
+ formulation.
+
+ Differental Revision: D34285
+
+ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306379 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/lib/Transforms/Scalar/SROA.cpp
++++ b/lib/Transforms/Scalar/SROA.cpp
+@@ -2388,9 +2388,20 @@
+ if (LI.isVolatile())
+ NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+
++ // Any !nonnull metadata or !range metadata on the old load is also valid
++ // on the new load. This is even true in some cases even when the loads
++ // are different types, for example by mapping !nonnull metadata to
++ // !range metadata by modeling the null pointer constant converted to the
++ // integer type.
++ // FIXME: Add support for range metadata here. Currently the utilities
++ // for this don't propagate range metadata in trivial cases from one
++ // integer load to another, don't handle non-addrspace-0 null pointers
++ // correctly, and don't have any support for mapping ranges as the
++ // integer type becomes winder or narrower.
++ if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
++ copyNonnullMetadata(LI, N, *NewLI);
++
+ // Try to preserve nonnull metadata
+- if (TargetTy->isPointerTy())
+- NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
+ V = NewLI;
+
+ // If this is an integer load past the end of the slice (which means the
+--- a/test/Transforms/SROA/preserve-nonnull.ll
++++ b/test/Transforms/SROA/preserve-nonnull.ll
+@@ -42,4 +42,51 @@
+ ret float* %ret
+ }
+
++; Make sure we properly handle the !nonnull attribute when we convert
++; a pointer load to an integer load.
++; FIXME: While this doesn't do anythnig actively harmful today, it really
++; should propagate the !nonnull metadata to range metadata. The irony is, it
++; *does* initially, but then we lose that !range metadata before we finish
++; SROA.
++define i8* @propagate_nonnull_to_int() {
++; CHECK-LABEL: define i8* @propagate_nonnull_to_int(
++; CHECK-NEXT: entry:
++; CHECK-NEXT: %[[A:.*]] = alloca i64
++; CHECK-NEXT: store i64 42, i64* %[[A]]
++; CHECK-NEXT: %[[LOAD:.*]] = load volatile i64, i64* %[[A]]
++; CHECK-NEXT: %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8*
++; CHECK-NEXT: ret i8* %[[CAST]]
++entry:
++ %a = alloca [2 x i8*]
++ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
++ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
++ %a.gep0.cast = bitcast i8** %a.gep0 to i64*
++ %a.gep1.cast = bitcast i8** %a.gep1 to i64*
++ store i64 42, i64* %a.gep1.cast
++ store i64 0, i64* %a.gep0.cast
++ %load = load volatile i8*, i8** %a.gep1, !nonnull !0
++ ret i8* %load
++}
++
++; Make sure we properly handle the !nonnull attribute when we convert
++; a pointer load to an integer load and immediately promote it to an SSA
++; register. This can fail in interesting ways due to the rewrite iteration of
++; SROA, resulting in PR32902.
++define i8* @propagate_nonnull_to_int_and_promote() {
++; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote(
++; CHECK-NEXT: entry:
++; CHECK-NEXT: %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8*
++; CHECK-NEXT: ret i8* %[[PROMOTED_VALUE]]
++entry:
++ %a = alloca [2 x i8*], align 8
++ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
++ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
++ %a.gep0.cast = bitcast i8** %a.gep0 to i64*
++ %a.gep1.cast = bitcast i8** %a.gep1 to i64*
++ store i64 42, i64* %a.gep1.cast
++ store i64 0, i64* %a.gep0.cast
++ %load = load i8*, i8** %a.gep1, align 8, !nonnull !0
++ ret i8* %load
++}
++
+ !0 = !{}
diff -Nru llvm-toolchain-4.0-4.0.1/debian/patches/series llvm-toolchain-4.0-4.0.1/debian/patches/series
--- llvm-toolchain-4.0-4.0.1/debian/patches/series 2017-09-18 21:34:54.000000000 +0200
+++ llvm-toolchain-4.0-4.0.1/debian/patches/series 2017-10-18 15:28:20.000000000 +0200
@@ -38,7 +38,6 @@
clang-tidy-run-bin.diff
#bug-30342.diff
fix-scan-view-path.diff
-#0011-SimplifyCFG-Hoisting-invalidates-metadata.patch
clang-fix-cmpxchg8-detection-on-i386.patch
lldb-addversion-suffix-to-llvm-server-exec.patch
lldb-missing-install.diff
@@ -49,3 +48,13 @@
ftfbs-gcc.diff
pr81066.diff
armhf-bitfield.diff
+# rust LLVM PR84, LLVM PR32488
+# This is actually Rust's backport of upstream RL305193 (which doesn't apply cleanly to LLVM 4)
+# https://github.com/rust-lang/llvm/commit/2b622a393ce
+rL305193-backport.diff
+# rust LLVM PR90, LLVM PR32902, PR31142
+rL298540.diff
+rL306267.diff
+rL306353.diff
+rL306358.diff
+rL306379.diff
More information about the Pkg-llvm-team
mailing list