diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index a6be24601047f..cd4b9f557fb49 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -3369,6 +3369,13 @@ void CallsiteContextGraph::identifyClones( if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) break; + // If the caller was not successfully matched to a call in the IR/summary, + // there is no point in trying to clone for it as we can't update that call. + if (!CallerEdge->Caller->hasCall()) { + ++EI; + continue; + } + // Only need to process the ids along this edge pertaining to the given // allocation. auto CallerEdgeContextsForAlloc = diff --git a/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll b/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll new file mode 100644 index 0000000000000..3179df61a26eb --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll @@ -0,0 +1,70 @@ +;; Test callsite context graph generation for simple call graph with +;; two memprof contexts and no inlining, where one callsite required for +;; cloning is missing (e.g. unmatched). +;; +;; Original code looks like: +;; +;; char *foo() { +;; return new char[10]; +;; } +;; +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; delete[] x; +;; sleep(200); +;; delete[] y; +;; return 0; +;; } + +; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-report-hinted-sizes \ +; RUN: -pass-remarks=memprof-context-disambiguation -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s --implicit-check-not "call in clone _Z3foov" \ +; RUN: --check-prefix=SIZESUNHINTED +; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not \"memprof\"=\"cold\" + +source_filename = "memprof-missing-callsite.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + ;; Missing callsite metadata blocks cloning + %call = call ptr @_Z3foov() + %call1 = call ptr @_Z3foov() + ret i32 0 +} + +define internal ptr @_Z3foov() #0 { +entry: + %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7 + ret ptr null +} + +declare ptr @_Znam(i64) + +; uselistorder directives +uselistorder ptr @_Z3foov, { 1, 0 } + +attributes #0 = { noinline optnone } + +!2 = !{!3, !5} +!3 = !{!4, !"notcold", !10} +!4 = !{i64 9086428284934609951, i64 8632435727821051414} +!5 = !{!6, !"cold", !11, !12} +!6 = !{i64 9086428284934609951, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!10 = !{i64 123, i64 100} +!11 = !{i64 456, i64 200} +!12 = !{i64 789, i64 300} + +; SIZESUNHINTED: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning +; SIZESUNHINTED: Cold full allocation context 456 with total size 200 is NotColdCold after cloning +; SIZESUNHINTED: Cold full allocation context 789 with total size 300 is NotColdCold after cloning diff --git a/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll b/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll index 49c22bf590e6b..a6863cb878be0 100644 --- a/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll +++ b/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll @@ -20,7 +20,9 @@ ; RUN: -stats -debug -save-temps \ ; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=DEBUG -; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR +;; We should not see any type of cold attribute or cloning applied +; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not cold \ +; RUN: --implicit-check-not ".memprof." ;; Try again but with distributed ThinLTO ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ @@ -39,26 +41,23 @@ ; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=DEBUG ;; Run ThinLTO backend +;; We should not see any type of cold attribute or cloning applied ; RUN: opt -passes=memprof-context-disambiguation \ ; RUN: -memprof-import-summary=%t.o.thinlto.bc \ -; RUN: -stats %t.o -S 2>&1 | FileCheck %s --check-prefix=IR +; RUN: -stats %t.o -S 2>&1 | FileCheck %s --implicit-check-not cold \ +; RUN: --implicit-check-not ".memprof." ; DEBUG: Not found through unique tail call chain: 17377440600225628772 (_Z3barv) from 15822663052811949562 (main) that actually called 8716735811002003409 (xyz) (found multiple possible chains) ; STATS: 1 memprof-context-disambiguation - Number of profiled callees found via multiple tail call chains -;; Check that all calls in the IR are to the original functions, leading to a -;; non-cold operator new call. - source_filename = "tailcall-nonunique.cc" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" ; Function Attrs: noinline -; IR-LABEL: @_Z3barv() define dso_local ptr @_Z3barv() local_unnamed_addr #0 { entry: - ; IR: call {{.*}} @_Znam(i64 10) #[[NOTCOLD:[0-9]+]] %call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !9 ret ptr %call } @@ -67,54 +66,43 @@ entry: declare ptr @_Znam(i64) #1 ; Function Attrs: noinline -; IR-LABEL: @_Z5blah1v() define dso_local ptr @_Z5blah1v() local_unnamed_addr #0 { entry: - ; IR: call ptr @_Z3barv() %call = tail call ptr @_Z3barv() ret ptr %call } ; Function Attrs: noinline -; IR-LABEL: @_Z5blah2v() define dso_local ptr @_Z5blah2v() local_unnamed_addr #0 { entry: - ; IR: call ptr @_Z3barv() %call = tail call ptr @_Z3barv() ret ptr %call } ; Function Attrs: noinline -; IR-LABEL: @_Z4baz1v() define dso_local ptr @_Z4baz1v() local_unnamed_addr #0 { entry: - ; IR: call ptr @_Z5blah1v() %call = tail call ptr @_Z5blah1v() ret ptr %call } ; Function Attrs: noinline -; IR-LABEL: @_Z4baz2v() define dso_local ptr @_Z4baz2v() local_unnamed_addr #0 { entry: - ; IR: call ptr @_Z5blah2v() %call = tail call ptr @_Z5blah2v() ret ptr %call } ; Function Attrs: noinline -; IR-LABEL: @_Z3foob(i1 %b) define dso_local ptr @_Z3foob(i1 %b) local_unnamed_addr #0 { entry: br i1 %b, label %if.then, label %if.else if.then: ; preds = %entry - ; IR: call ptr @_Z4baz1v() %call = tail call ptr @_Z4baz1v() br label %return if.else: ; preds = %entry - ; IR: call ptr @_Z4baz2v() %call1 = tail call ptr @_Z4baz2v() br label %return @@ -124,29 +112,21 @@ return: ; preds = %if.else, %if.then } ; Function Attrs: noinline -; IR-LABEL: @xyz() define dso_local i32 @xyz() local_unnamed_addr #0 { delete.end13: - ; IR: call ptr @_Z3foob(i1 true) %call = tail call ptr @_Z3foob(i1 true) - ; IR: call ptr @_Z3foob(i1 true) %call1 = tail call ptr @_Z3foob(i1 true) - ; IR: call ptr @_Z3foob(i1 false) %call2 = tail call ptr @_Z3foob(i1 false) - ; IR: call ptr @_Z3foob(i1 false) %call3 = tail call ptr @_Z3foob(i1 false) ret i32 0 } define dso_local i32 @main() local_unnamed_addr #0 { delete.end13: - ; IR: call i32 @xyz() %call1 = tail call i32 @xyz(), !callsite !11 ret i32 0 } -; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" } - attributes #0 = { noinline } attributes #1 = { nobuiltin allocsize(0) } attributes #2 = { builtin allocsize(0) } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll b/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll index 75cebae0b8297..bcb6ebc51d7bc 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll @@ -7,14 +7,8 @@ ; RUN: -memprof-verify-ccg -memprof-verify-nodes \ ; RUN: -pass-remarks=memprof-context-disambiguation %s -S 2>&1 | FileCheck %s -;; Make sure we created some clones -; CHECK: created clone A.memprof.1 -; CHECK: created clone C.memprof.1 -; CHECK: created clone D.memprof.1 -; CHECK: created clone E.memprof.1 -; CHECK: created clone B.memprof.1 -; CHECK: created clone F.memprof.1 -; CHECK: created clone G.memprof.1 +;; Make sure we successfully created at least one clone +; CHECK: created clone {{.*}}.memprof.1 ; ModuleID = '' source_filename = "reduced.ll"