diff --git a/clang/test/Driver/linker-wrapper-libs.c b/clang/test/Driver/linker-wrapper-libs.c index 22cc24f2e258a..cb5c7c137a0ba 100644 --- a/clang/test/Driver/linker-wrapper-libs.c +++ b/clang/test/Driver/linker-wrapper-libs.c @@ -48,7 +48,7 @@ int bar() { return weak; } // RUN: --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-RESOLVES -// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o +// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o // LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o // @@ -72,7 +72,7 @@ int bar() { return weak; } // RUN: --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL -// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o +// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o // LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o // @@ -96,7 +96,7 @@ int bar() { return weak; } // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-NONE // LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o -// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o +// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o // // Check that we do not extract an external weak symbol. @@ -161,7 +161,7 @@ int bar() { return weak; } // RUN: --linker-path=/usr/bin/ld %t.o %t.a %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-DEFINED -// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o +// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o // LIBRARY-GLOBAL-DEFINED-NOT: {{.*}}gfx1030{{.*}}.o // LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o @@ -185,7 +185,7 @@ int bar() { return weak; } // RUN: --linker-path=/usr/bin/ld %t.o --whole-archive %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-WHOLE-ARCHIVE -// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.s {{.*}}.o +// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o // LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o -// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_52 {{.*}}.s +// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_52 {{.*}}.o // LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a {{.*}}.o diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index b9fa08ace0ff7..63d43921be9ac 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -48,7 +48,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS -// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.s -save-temps +// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -Wl,--no-undefined {{.*}}.o -save-temps // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \ @@ -147,7 +147,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND -// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.bc +// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.o // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 5edf4c982baa4..9a38e4fb098f9 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -293,6 +293,13 @@ Expected findProgram(StringRef Name, ArrayRef Paths) { return *Path; } +/// We will defer LTO to the target's linker if we are not doing JIT and it is +/// supported by the toolchain. +bool linkerSupportsLTO(const ArgList &Args) { + llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); + return Triple.isNVPTX() || Triple.isAMDGPU(); +} + /// Returns the hashed value for a constant string. std::string getHash(StringRef Str) { llvm::MD5 Hasher; @@ -555,11 +562,10 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { llvm::copy(LinkerArgs, std::back_inserter(CmdArgs)); } - // Pass on -mllvm options to the clang invocation. - for (const opt::Arg *Arg : Args.filtered(OPT_mllvm)) { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back(Arg->getValue()); - } + // Pass on -mllvm options to the linker invocation. + for (const opt::Arg *Arg : Args.filtered(OPT_mllvm)) + CmdArgs.push_back( + Args.MakeArgString("-Wl,-mllvm=" + StringRef(Arg->getValue()))); if (Args.hasArg(OPT_debug)) CmdArgs.push_back("-g"); @@ -567,6 +573,12 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { if (SaveTemps) CmdArgs.push_back("-save-temps"); + if (SaveTemps && linkerSupportsLTO(Args)) + CmdArgs.push_back("-Wl,--save-temps"); + + if (Args.hasArg(OPT_embed_bitcode)) + CmdArgs.push_back("-Wl,--lto-emit-llvm"); + if (Verbose) CmdArgs.push_back("-v"); @@ -587,8 +599,8 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { Args.MakeArgString(Arg.split('=').second)}); } - // The OpenMPOpt pass can introduce new calls and is expensive, we do not want - // this when running CodeGen through clang. + // The OpenMPOpt pass can introduce new calls and is expensive, we do + // not want this when running CodeGen through clang. if (Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ)) CmdArgs.append({"-mllvm", "-openmp-opt-disable"}); @@ -772,8 +784,9 @@ Error linkBitcodeFiles(SmallVectorImpl &InputFiles, BumpPtrAllocator Alloc; StringSaver Saver(Alloc); - // Search for bitcode files in the input and create an LTO input file. If it - // is not a bitcode file, scan its symbol table for symbols we need to save. + // Search for bitcode files in the input and create an LTO input file. If + // it is not a bitcode file, scan its symbol table for symbols we need to + // save. for (OffloadFile &File : InputFiles) { MemoryBufferRef Buffer = MemoryBufferRef(File.getBinary()->getImage(), ""); @@ -807,7 +820,8 @@ Error linkBitcodeFiles(SmallVectorImpl &InputFiles, if (!Name) return Name.takeError(); - // Record if we've seen these symbols in any object or shared libraries. + // Record if we've seen these symbols in any object or shared + // libraries. if ((*ObjFile)->isRelocatableObject()) UsedInRegularObj.insert(Saver.save(*Name)); else @@ -844,7 +858,8 @@ Error linkBitcodeFiles(SmallVectorImpl &InputFiles, return false; }; - // We assume visibility of the whole program if every input file was bitcode. + // We assume visibility of the whole program if every input file was + // bitcode. auto Features = getTargetFeatures(BitcodeInputFiles); auto LTOBackend = Args.hasArg(OPT_embed_bitcode) || Args.hasArg(OPT_builtin_bitcode_EQ) || @@ -852,9 +867,9 @@ Error linkBitcodeFiles(SmallVectorImpl &InputFiles, ? createLTO(Args, Features, OutputBitcode) : createLTO(Args, Features); - // We need to resolve the symbols so the LTO backend knows which symbols need - // to be kept or can be internalized. This is a simplified symbol resolution - // scheme to approximate the full resolution a linker would do. + // We need to resolve the symbols so the LTO backend knows which symbols + // need to be kept or can be internalized. This is a simplified symbol + // resolution scheme to approximate the full resolution a linker would do. uint64_t Idx = 0; DenseSet PrevailingSymbols; for (auto &BitcodeInput : BitcodeInputFiles) { @@ -886,7 +901,8 @@ Error linkBitcodeFiles(SmallVectorImpl &InputFiles, // We need LTO to preseve the following global symbols: // 1) Symbols used in regular objects. // 2) Sections that will be given a __start/__stop symbol. - // 3) Prevailing symbols that are needed visible to external libraries. + // 3) Prevailing symbols that are needed visible to external + // libraries. Res.VisibleToRegularObj = UsedInRegularObj.contains(Sym.getName()) || isValidCIdentifier(Sym.getSectionName()) || @@ -901,9 +917,9 @@ Error linkBitcodeFiles(SmallVectorImpl &InputFiles, (UsedInSharedLib.contains(Sym.getName()) || !Sym.canBeOmittedFromSymbolTable()); - // The final definition will reside in this linkage unit if the symbol is - // defined and local to the module. This only checks for bitcode files, - // full assertion will require complete symbol resolution. + // The final definition will reside in this linkage unit if the symbol + // is defined and local to the module. This only checks for bitcode + // files, full assertion will require complete symbol resolution. Res.FinalDefinitionInLinkageUnit = Sym.getVisibility() != GlobalValue::DefaultVisibility && (!Sym.isUndefined() && !Sym.isCommon()); @@ -956,8 +972,8 @@ Error linkBitcodeFiles(SmallVectorImpl &InputFiles, return Error::success(); } - // Append the new inputs to the device linker input. If the user requested an - // internalizing link we need to pass the bitcode to clang. + // Append the new inputs to the device linker input. If the user requested + // an internalizing link we need to pass the bitcode to clang. for (StringRef File : Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ) ? BitcodeOutput @@ -972,10 +988,9 @@ Expected writeOffloadFile(const OffloadFile &File) { StringRef Prefix = sys::path::stem(Binary.getMemoryBufferRef().getBufferIdentifier()); - StringRef Suffix = getImageKindName(Binary.getImageKind()); auto TempFileOrErr = createOutputFile( - Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch(), Suffix); + Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch(), "o"); if (!TempFileOrErr) return TempFileOrErr.takeError(); @@ -1188,8 +1203,8 @@ DerivedArgList getLinkerArgs(ArrayRef Input, DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ), Args.MakeArgString(Input.front().getBinary()->getTriple())); - // If every input file is bitcode we have whole program visibility as we do - // only support static linking with bitcode. + // If every input file is bitcode we have whole program visibility as we + // do only support static linking with bitcode. auto ContainsBitcode = [](const OffloadFile &F) { return identify_magic(F.getBinary()->getImage()) == file_magic::bitcode; }; @@ -1277,12 +1292,15 @@ Expected> linkAndWrapDeviceFiles( if (File.getBinary()->getOffloadKind() != OFK_None) ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind()); - // First link and remove all the input files containing bitcode. + // First link and remove all the input files containing bitcode if + // the target linker does not support it natively. SmallVector InputFiles; - if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs)) - return Err; + if (!linkerSupportsLTO(LinkerArgs)) + if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs)) + return Err; - // Write any remaining device inputs to an output file for the linker. + // Write any remaining device inputs to an output file for the + // linker. for (const OffloadFile &File : Input) { auto FileNameOrErr = writeOffloadFile(File); if (!FileNameOrErr) @@ -1291,9 +1309,10 @@ Expected> linkAndWrapDeviceFiles( } // Link the remaining device files using the device linker. - auto OutputOrErr = !Args.hasArg(OPT_embed_bitcode) - ? linkDevice(InputFiles, LinkerArgs) - : InputFiles.front(); + auto OutputOrErr = + !Args.hasArg(OPT_embed_bitcode) || linkerSupportsLTO(LinkerArgs) + ? linkDevice(InputFiles, LinkerArgs) + : InputFiles.front(); if (!OutputOrErr) return OutputOrErr.takeError(); @@ -1420,12 +1439,14 @@ Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind, bool NewSymbol = Syms.count(Sym.getName()) == 0; auto OldSym = NewSymbol ? Sym_None : Syms[Sym.getName()]; - // We will extract if it defines a currenlty undefined non-weak symbol. + // We will extract if it defines a currenlty undefined non-weak + // symbol. bool ResolvesStrongReference = ((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) && !Sym.isUndefined()); - // We will extract if it defines a new global symbol visible to the host. - // This is only necessary for code targeting an offloading language. + // We will extract if it defines a new global symbol visible to the + // host. This is only necessary for code targeting an offloading + // language. bool NewGlobalSymbol = ((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined() && !Sym.canBeOmittedFromSymbolTable() && Kind != object::OFK_None && @@ -1480,8 +1501,9 @@ Expected getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind, !(OldSym & Sym_Weak) && !(*FlagsOrErr & SymbolRef::SF_Undefined); - // We will extract if it defines a new global symbol visible to the host. - // This is only necessary for code targeting an offloading language. + // We will extract if it defines a new global symbol visible to the + // host. This is only necessary for code targeting an offloading + // language. bool NewGlobalSymbol = ((NewSymbol || (OldSym & Sym_Undefined)) && !(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None && @@ -1648,8 +1670,8 @@ getDeviceInput(const ArgList &Args) { Expected ExtractOrErr = getSymbols(Binary.getBinary()->getImage(), - Binary.getBinary()->getOffloadKind(), /*IsArchive=*/true, - Saver, Syms[ID]); + Binary.getBinary()->getOffloadKind(), + /*IsArchive=*/true, Saver, Syms[ID]); if (!ExtractOrErr) return ExtractOrErr.takeError();