diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 8d599c96eb4fb..94baa35e6a9ec 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -119,6 +119,8 @@ def err_drv_cuda_host_arch : Error< "unsupported architecture '%0' for host compilation">; def err_drv_mix_cuda_hip : Error< "mixed CUDA and HIP compilation is not supported">; +def err_drv_mix_offload : Error< + "mixed %0 and %1 offloading compilation is not supported">; def err_drv_bad_target_id : Error< "invalid target ID '%0'; format is a processor name followed by an optional " "colon-delimited list of features followed by an enable/disable sign (e.g., " diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index f4a52cc529b79..b463dc2a93550 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -797,22 +797,14 @@ class Driver { const ToolChain &getToolChain(const llvm::opt::ArgList &Args, const llvm::Triple &Target) const; - /// @} - - /// Retrieves a ToolChain for a particular device \p Target triple - /// - /// \param[in] HostTC is the host ToolChain paired with the device - /// - /// \param[in] TargetDeviceOffloadKind (e.g. OFK_Cuda/OFK_OpenMP/OFK_SYCL) is - /// an Offloading action that is optionally passed to a ToolChain (used by - /// CUDA, to specify if it's used in conjunction with OpenMP) + /// Retrieves a ToolChain for a particular \p Target triple for offloading. /// /// Will cache ToolChains for the life of the driver object, and create them /// on-demand. - const ToolChain &getOffloadingDeviceToolChain( - const llvm::opt::ArgList &Args, const llvm::Triple &Target, - const ToolChain &HostTC, - const Action::OffloadKind &TargetDeviceOffloadKind) const; + const ToolChain &getOffloadToolChain(const llvm::opt::ArgList &Args, + const Action::OffloadKind Kind, + const llvm::Triple &Target, + const llvm::Triple &AuxTarget) const; /// Get bitmasks for which option flags to include and exclude based on /// the driver mode. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d8123cc39fdc9..482d264ef37e2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -932,7 +932,9 @@ def W_Joined : Joined<["-"], "W">, Group, def Xanalyzer : Separate<["-"], "Xanalyzer">, HelpText<"Pass to the static analyzer">, MetaVarName<"">, Group; -def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>; +def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>, + HelpText<"Pass to the compiliation if the target matches ">, + MetaVarName<" ">; def Xarch_host : Separate<["-"], "Xarch_host">, Flags<[NoXarchOption]>, HelpText<"Pass to the CUDA/HIP host compilation">, MetaVarName<"">; def Xarch_device : Separate<["-"], "Xarch_device">, Flags<[NoXarchOption]>, @@ -1115,14 +1117,17 @@ def fno_convergent_functions : Flag<["-"], "fno-convergent-functions">, // Common offloading options let Group = offload_Group in { -def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[NoXarchOption]>, +def offload_targets_EQ : Joined<["--"], "offload-targets=">, + Visibility<[ClangOption, FlangOption]>, Flags<[NoXarchOption]>, + HelpText<"Specify a list of target architectures to use for offloading.">; + +def offload_arch_EQ : Joined<["--"], "offload-arch=">, Visibility<[ClangOption, FlangOption]>, HelpText<"Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). " "If 'native' is used the compiler will detect locally installed architectures. " "For HIP offloading, the device architecture can be followed by target ID features " "delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once.">; def no_offload_arch_EQ : Joined<["--"], "no-offload-arch=">, - Flags<[NoXarchOption]>, Visibility<[ClangOption, FlangOption]>, HelpText<"Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. " "'all' resets the list to its default value.">; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 912777a9808b4..cb0f06b7ed463 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -879,34 +879,100 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, }) || C.getInputArgs().hasArg(options::OPT_hip_link) || C.getInputArgs().hasArg(options::OPT_hipstdpar); + bool IsOpenMP = + C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, + options::OPT_fno_openmp, false); + bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl, + options::OPT_fno_sycl, false); + bool UseLLVMOffload = C.getInputArgs().hasArg( options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false); + + llvm::DenseSet Kinds; + const std::pair ActiveKinds[] = { + {IsCuda, Action::OFK_Cuda}, + {IsHIP, Action::OFK_HIP}, + {IsOpenMP, Action::OFK_OpenMP}, + {IsSYCL, Action::OFK_SYCL}}; + for (const auto &[Active, Kind] : ActiveKinds) + if (Active) + Kinds.insert(Kind); + + // Build an offloading toolchain for every requested target and kind. + for (StringRef Targets : + C.getInputArgs().getAllArgValues(options::OPT_offload_targets_EQ)) { + // We currently don't support any kind of mixed offloading. + if (Kinds.size() > 1) { + Diag(clang::diag::err_drv_mix_offload) + << Action::GetOffloadKindName(*Kinds.begin()) + << Action::GetOffloadKindName(*Kinds.end()); + return; + } + + // OpenMP offloading requires a compatible libomp. + if (Kinds.contains(Action::OFK_OpenMP)) { + OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs()); + if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) { + Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); + return; + } + } + + // Certain options are not allowed when combined with SYCL compilation. + if (Kinds.contains(Action::OFK_SYCL)) { + for (auto ID : + {options::OPT_static_libstdcxx, options::OPT_ffreestanding}) + if (Arg *IncompatArg = C.getInputArgs().getLastArg(ID)) + Diag(clang::diag::err_drv_argument_not_allowed_with) + << IncompatArg->getSpelling() << "-fsycl"; + } + + // Create a device toolchain for every specified triple. + for (StringRef Target : llvm::split(Targets, ",")) { + llvm::Triple TT(Target); + for (Action::OffloadKind Kind : Kinds) { + auto &TC = getOffloadToolChain(C.getInputArgs(), Kind, TT, + C.getDefaultToolChain().getTriple()); + + // Emit a warning if the detected CUDA version is too new. + if (Kind == Action::OFK_Cuda) { + auto &CudaInstallation = + static_cast(TC) + .CudaInstallation; + if (CudaInstallation.isValid()) + CudaInstallation.WarnIfUnsupportedVersion(); + } + + C.addOffloadDeviceToolChain(&TC, Kind); + } + } + } + + // If the user did not specify the toolchains specifically we will infer them + // based on the usage of `--offloard-arch=`. + if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) + return; + if (IsCuda && IsHIP) { Diag(clang::diag::err_drv_mix_cuda_hip); return; } if (IsCuda && !UseLLVMOffload) { - const ToolChain *HostTC = C.getSingleOffloadToolChain(); - const llvm::Triple &HostTriple = HostTC->getTriple(); - auto OFK = Action::OFK_Cuda; - auto CudaTriple = - getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(), HostTriple); + auto CudaTriple = getNVIDIAOffloadTargetTriple( + *this, C.getInputArgs(), C.getDefaultToolChain().getTriple()); if (!CudaTriple) return; - // Use the CUDA and host triples as the key into the ToolChains map, - // because the device toolchain we create depends on both. - auto &CudaTC = ToolChains[CudaTriple->str() + "/" + HostTriple.str()]; - if (!CudaTC) { - CudaTC = std::make_unique( - *this, *CudaTriple, *HostTC, C.getInputArgs()); - - // Emit a warning if the detected CUDA version is too new. - CudaInstallationDetector &CudaInstallation = - static_cast(*CudaTC).CudaInstallation; - if (CudaInstallation.isValid()) - CudaInstallation.WarnIfUnsupportedVersion(); - } - C.addOffloadDeviceToolChain(CudaTC.get(), OFK); + + auto &TC = + getOffloadToolChain(C.getInputArgs(), Action::OFK_Cuda, *CudaTriple, + C.getDefaultToolChain().getTriple()); + + // Emit a warning if the detected CUDA version is too new. + const CudaInstallationDetector &CudaInstallation = + static_cast(TC).CudaInstallation; + if (CudaInstallation.isValid()) + CudaInstallation.WarnIfUnsupportedVersion(); + C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda); } else if (IsHIP && !UseLLVMOffload) { if (auto *OMPTargetArg = C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) { @@ -914,14 +980,15 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, << OMPTargetArg->getSpelling() << "HIP"; return; } - const ToolChain *HostTC = C.getSingleOffloadToolChain(); - auto OFK = Action::OFK_HIP; + auto HIPTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs()); if (!HIPTriple) return; - auto *HIPTC = &getOffloadingDeviceToolChain(C.getInputArgs(), *HIPTriple, - *HostTC, OFK); - C.addOffloadDeviceToolChain(HIPTC, OFK); + + auto &TC = + getOffloadToolChain(C.getInputArgs(), Action::OFK_HIP, *HIPTriple, + C.getDefaultToolChain().getTriple()); + C.addOffloadDeviceToolChain(&TC, Action::OFK_HIP); } if (IsCuda || IsHIP) @@ -934,10 +1001,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // the -fopenmp-targets option or used --offload-arch with OpenMP enabled. bool IsOpenMPOffloading = ((IsCuda || IsHIP) && UseLLVMOffload) || - (C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, - options::OPT_fno_openmp, false) && - (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ) || - C.getInputArgs().hasArg(options::OPT_offload_arch_EQ))); + (IsOpenMP && (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ) || + C.getInputArgs().hasArg(options::OPT_offload_arch_EQ))); if (IsOpenMPOffloading) { // We expect that -fopenmp-targets is always used in conjunction with the // option -fopenmp specifying a valid runtime with offloading support, i.e. @@ -1038,50 +1103,23 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, FoundNormalizedTriples[NormalizedName] = Val; // If the specified target is invalid, emit a diagnostic. - if (TT.getArch() == llvm::Triple::UnknownArch) + if (TT.getArch() == llvm::Triple::UnknownArch) { Diag(clang::diag::err_drv_invalid_omp_target) << Val; - else { - const ToolChain *TC; - // Device toolchains have to be selected differently. They pair host - // and device in their implementation. - if (TT.isNVPTX() || TT.isAMDGCN() || TT.isSPIRV()) { - const ToolChain *HostTC = - C.getSingleOffloadToolChain(); - assert(HostTC && "Host toolchain should be always defined."); - auto &DeviceTC = - ToolChains[TT.str() + "/" + HostTC->getTriple().normalize()]; - if (!DeviceTC) { - if (TT.isNVPTX()) - DeviceTC = std::make_unique( - *this, TT, *HostTC, C.getInputArgs()); - else if (TT.isAMDGCN()) - DeviceTC = std::make_unique( - *this, TT, *HostTC, C.getInputArgs()); - else if (TT.isSPIRV()) - DeviceTC = std::make_unique( - *this, TT, *HostTC, C.getInputArgs()); - else - assert(DeviceTC && "Device toolchain not defined."); - } - - TC = DeviceTC.get(); - } else - TC = &getToolChain(C.getInputArgs(), TT); - C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP); - auto It = DerivedArchs.find(TT.getTriple()); - if (It != DerivedArchs.end()) - KnownArchs[TC] = It->second; + continue; } + + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, + C.getDefaultToolChain().getTriple()); + C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); + auto It = DerivedArchs.find(TT.getTriple()); + if (It != DerivedArchs.end()) + KnownArchs[&TC] = It->second; } } else if (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ)) { Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); return; } - // We need to generate a SYCL toolchain if the user specified -fsycl. - bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl, - options::OPT_fno_sycl, false); - auto argSYCLIncompatible = [&](OptSpecifier OptId) { if (!IsSYCL) return; @@ -1103,9 +1141,9 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // getOffloadingDeviceToolChain, because the device toolchains we're // going to create will depend on both. const ToolChain *HostTC = C.getSingleOffloadToolChain(); - for (const auto &TargetTriple : UniqueSYCLTriplesVec) { - auto SYCLTC = &getOffloadingDeviceToolChain( - C.getInputArgs(), TargetTriple, *HostTC, Action::OFK_SYCL); + for (const auto &TT : UniqueSYCLTriplesVec) { + auto SYCLTC = &getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, + HostTC->getTriple()); C.addOffloadDeviceToolChain(SYCLTC, Action::OFK_SYCL); } } @@ -3409,7 +3447,8 @@ class OffloadingActionBuilder final { // Collect all offload arch parameters, removing duplicates. std::set GpuArchs; bool Error = false; - for (Arg *A : Args) { + const ToolChain &TC = *ToolChains.front(); + for (Arg *A : C.getArgsForToolChain(&TC, "", AssociatedOffloadKind)) { if (!(A->getOption().matches(options::OPT_offload_arch_EQ) || A->getOption().matches(options::OPT_no_offload_arch_EQ))) continue; @@ -3420,7 +3459,6 @@ class OffloadingActionBuilder final { ArchStr == "all") { GpuArchs.clear(); } else if (ArchStr == "native") { - const ToolChain &TC = *ToolChains.front(); auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args); if (!GPUsOrErr) { TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) @@ -6604,6 +6642,72 @@ std::string Driver::GetClPchPath(Compilation &C, StringRef BaseName) const { return std::string(Output); } +const ToolChain &Driver::getOffloadToolChain( + const llvm::opt::ArgList &Args, const Action::OffloadKind Kind, + const llvm::Triple &Target, const llvm::Triple &AuxTarget) const { + auto &TC = ToolChains[Target.str() + "/" + AuxTarget.str()]; + auto &HostTC = ToolChains[AuxTarget.str()]; + + assert(HostTC && "Host toolchain for offloading doesn't exit?"); + if (!TC) { + // Detect the toolchain based off of the target operating system. + switch (Target.getOS()) { + case llvm::Triple::CUDA: + TC = std::make_unique(*this, Target, *HostTC, + Args); + break; + case llvm::Triple::AMDHSA: + if (Kind == Action::OFK_HIP) + TC = std::make_unique(*this, Target, + *HostTC, Args); + else if (Kind == Action::OFK_OpenMP) + TC = std::make_unique(*this, Target, + *HostTC, Args); + break; + default: + break; + } + } + if (!TC) { + // Detect the toolchain based off of the target architecture if that failed. + switch (Target.getArch()) { + case llvm::Triple::spir: + case llvm::Triple::spir64: + case llvm::Triple::spirv: + case llvm::Triple::spirv32: + case llvm::Triple::spirv64: + switch (Kind) { + case Action::OFK_SYCL: + TC = std::make_unique(*this, Target, *HostTC, + Args); + break; + case Action::OFK_HIP: + TC = std::make_unique(*this, Target, + *HostTC, Args); + break; + case Action::OFK_OpenMP: + TC = std::make_unique(*this, Target, + *HostTC, Args); + break; + case Action::OFK_Cuda: + TC = std::make_unique(*this, Target, *HostTC, + Args); + break; + default: + break; + } + break; + default: + break; + } + } + + // If all else fails, just look up the normal toolchain for the target. + if (!TC) + return getToolChain(Args, Target); + return *TC; +} + const ToolChain &Driver::getToolChain(const ArgList &Args, const llvm::Triple &Target) const { @@ -6797,45 +6901,6 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, return *TC; } -const ToolChain &Driver::getOffloadingDeviceToolChain( - const ArgList &Args, const llvm::Triple &Target, const ToolChain &HostTC, - const Action::OffloadKind &TargetDeviceOffloadKind) const { - // Use device / host triples as the key into the ToolChains map because the - // device ToolChain we create depends on both. - auto &TC = ToolChains[Target.str() + "/" + HostTC.getTriple().str()]; - if (!TC) { - // Categorized by offload kind > arch rather than OS > arch like - // the normal getToolChain call, as it seems a reasonable way to categorize - // things. - switch (TargetDeviceOffloadKind) { - case Action::OFK_HIP: { - if (((Target.getArch() == llvm::Triple::amdgcn || - Target.getArch() == llvm::Triple::spirv64) && - Target.getVendor() == llvm::Triple::AMD && - Target.getOS() == llvm::Triple::AMDHSA) || - !Args.hasArgNoClaim(options::OPT_offload_EQ)) - TC = std::make_unique(*this, Target, - HostTC, Args); - else if (Target.getArch() == llvm::Triple::spirv64 && - Target.getVendor() == llvm::Triple::UnknownVendor && - Target.getOS() == llvm::Triple::UnknownOS) - TC = std::make_unique(*this, Target, - HostTC, Args); - break; - } - case Action::OFK_SYCL: - if (Target.isSPIROrSPIRV()) - TC = std::make_unique(*this, Target, HostTC, - Args); - break; - default: - break; - } - } - assert(TC && "Could not create offloading device tool chain."); - return *TC; -} - bool Driver::ShouldUseClangCompiler(const JobAction &JA) const { // Say "no" if there is not exactly one input of a type clang understands. if (JA.size() != 1 || diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index ebc982096595e..c25d1b6be14b5 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1648,7 +1648,8 @@ void ToolChain::TranslateXarchArgs( A->getOption().matches(options::OPT_Xarch_host)) ValuePos = 0; - unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(ValuePos)); + const InputArgList &BaseArgs = Args.getBaseArgs(); + unsigned Index = BaseArgs.MakeIndex(A->getValue(ValuePos)); unsigned Prev = Index; std::unique_ptr XarchArg(Opts.ParseOneArg(Args, Index)); @@ -1672,8 +1673,31 @@ void ToolChain::TranslateXarchArgs( Diags.Report(DiagID) << A->getAsString(Args); return; } + XarchArg->setBaseArg(A); A = XarchArg.release(); + + // Linker input arguments require custom handling. The problem is that we + // have already constructed the phase actions, so we can not treat them as + // "input arguments". + if (A->getOption().hasFlag(options::LinkerInput)) { + // Convert the argument into individual Zlinker_input_args. Need to do this + // manually to avoid memory leaks with the allocated arguments. + for (const char *Value : A->getValues()) { + auto Opt = Opts.getOption(options::OPT_Zlinker_input); + unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value); + auto NewArg = + new Arg(Opt, BaseArgs.MakeArgString(Opt.getPrefix() + Opt.getName()), + Index, BaseArgs.getArgString(Index + 1), A); + + DAL->append(NewArg); + if (!AllocatedArgs) + DAL->AddSynthesizedArg(NewArg); + else + AllocatedArgs->push_back(NewArg); + } + } + if (!AllocatedArgs) DAL->AddSynthesizedArg(A); else @@ -1697,19 +1721,17 @@ llvm::opt::DerivedArgList *ToolChain::TranslateXarchArgs( } else if (A->getOption().matches(options::OPT_Xarch_host)) { NeedTrans = !IsDevice; Skip = IsDevice; - } else if (A->getOption().matches(options::OPT_Xarch__) && IsDevice) { - // Do not translate -Xarch_ options for non CUDA/HIP toolchain since - // they may need special translation. - // Skip this argument unless the architecture matches BoundArch - if (BoundArch.empty() || A->getValue(0) != BoundArch) - Skip = true; - else - NeedTrans = true; + } else if (A->getOption().matches(options::OPT_Xarch__)) { + NeedTrans = A->getValue() == getArchName() || + (!BoundArch.empty() && A->getValue() == BoundArch); + Skip = !NeedTrans; } if (NeedTrans || Skip) Modified = true; - if (NeedTrans) + if (NeedTrans) { + A->claim(); TranslateXarchArgs(Args, A, DAL, AllocatedArgs); + } if (!Skip) DAL->append(A); } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index c800e9cfa0a8d..c7d5893085080 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -123,7 +123,7 @@ CudaVersion parseCudaHFile(llvm::StringRef Input) { } } // namespace -void CudaInstallationDetector::WarnIfUnsupportedVersion() { +void CudaInstallationDetector::WarnIfUnsupportedVersion() const { if (Version > CudaVersion::PARTIALLY_SUPPORTED) { std::string VersionString = CudaVersionToString(Version); if (!VersionString.empty()) diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h index 7a6a6fb209012..c2219ec47cfa9 100644 --- a/clang/lib/Driver/ToolChains/Cuda.h +++ b/clang/lib/Driver/ToolChains/Cuda.h @@ -74,7 +74,7 @@ class CudaInstallationDetector { std::string getLibDeviceFile(StringRef Gpu) const { return LibDeviceMap.lookup(Gpu); } - void WarnIfUnsupportedVersion(); + void WarnIfUnsupportedVersion() const; }; namespace tools { diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 9a276c55bf7bc..b26c5bf1a909e 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -2777,30 +2777,6 @@ DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args, // and try to push it down into tool specific logic. for (Arg *A : Args) { - if (A->getOption().matches(options::OPT_Xarch__)) { - // Skip this argument unless the architecture matches either the toolchain - // triple arch, or the arch being bound. - StringRef XarchArch = A->getValue(0); - if (!(XarchArch == getArchName() || - (!BoundArch.empty() && XarchArch == BoundArch))) - continue; - - Arg *OriginalArg = A; - TranslateXarchArgs(Args, A, DAL); - - // Linker input arguments require custom handling. The problem is that we - // have already constructed the phase actions, so we can not treat them as - // "input arguments". - if (A->getOption().hasFlag(options::LinkerInput)) { - // Convert the argument into individual Zlinker_input_args. - for (const char *Value : A->getValues()) { - DAL->AddSeparateArg( - OriginalArg, Opts.getOption(options::OPT_Zlinker_input), Value); - } - continue; - } - } - // Sob. These is strictly gcc compatible for the time being. Apple // gcc translates options twice, which means that self-expanding // options add duplicates. diff --git a/clang/test/Driver/Xarch.c b/clang/test/Driver/Xarch.c index f7693fb689d58..f35e2926f9c8d 100644 --- a/clang/test/Driver/Xarch.c +++ b/clang/test/Driver/Xarch.c @@ -1,8 +1,13 @@ // RUN: %clang -target i386-apple-darwin11 -m32 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -target x86_64-unknown-linux-gnu -Xarch_x86_64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -target x86_64-unknown-windows-msvc -Xarch_x86_64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -target aarch64-unknown-linux-gnu -Xarch_aarch64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -target powerpc64le-unknown-linux-gnu -Xarch_powerpc64le -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s // O3ONCE: "-O3" // O3ONCE-NOT: "-O3" // RUN: %clang -target i386-apple-darwin11 -m64 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3NONE %s +// RUN: %clang -target x86_64-unknown-linux-gnu -m64 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3NONE %s // O3NONE-NOT: "-O3" // O3NONE: argument unused during compilation: '-Xarch_i386 -O3' @@ -10,3 +15,6 @@ // INVALID: error: invalid Xarch argument: '-Xarch_i386 -o' // INVALID: error: invalid Xarch argument: '-Xarch_i386 -S' // INVALID: error: invalid Xarch argument: '-Xarch_i386 -o' + +// RUN: %clang -target x86_64-unknown-linux-gnu -Xarch_x86_64 -Wl,foo %s -### 2>&1 | FileCheck -check-prefix=LINKER %s +// LINKER: "foo" diff --git a/clang/test/Driver/offload-Xarch.c b/clang/test/Driver/offload-Xarch.c new file mode 100644 index 0000000000000..07b29b8e48841 --- /dev/null +++ b/clang/test/Driver/offload-Xarch.c @@ -0,0 +1,31 @@ +// RUN: %clang -x cuda %s -Xarch_nvptx64 -O3 -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -x hip %s -Xarch_amdgcn -O3 -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib -nogpuinc \ +// RUN: -Xarch_amdgcn -march=gfx90a -Xarch_amdgcn -O3 -S -### %s 2>&1 \ +// RUN: | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc \ +// RUN: -Xarch_nvptx64 -march=sm_52 -Xarch_nvptx64 -O3 -S -### %s 2>&1 \ +// RUN: | FileCheck -check-prefix=O3ONCE %s +// O3ONCE: "-O3" +// O3ONCE-NOT: "-O3" + +// RUN: %clang -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -nogpulib \ +// RUN: --target=x86_64-unknown-linux-gnu -Xarch_nvptx64 --offload-arch=sm_52,sm_60 -nogpuinc \ +// RUN: -Xarch_amdgcn --offload-arch=gfx90a,gfx1030 -ccc-print-bindings -### %s 2>&1 \ +// RUN: | FileCheck -check-prefix=OPENMP %s +// +// OPENMP: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]" +// OPENMP: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[GFX1030_BC:.+]]" +// OPENMP: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[GFX90A_BC:.+]]" +// OPENMP: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[SM52_PTX:.+]]" +// OPENMP: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[SM52_PTX]]"], output: "[[SM52_CUBIN:.+]]" +// OPENMP: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[SM60_PTX:.+]]" +// OPENMP: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[SM60_PTX]]"], output: "[[SM60_CUBIN:.+]]" +// OPENMP: # "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[GFX1030_BC]]", "[[GFX90A_BC]]", "[[SM52_CUBIN]]", "[[SM60_CUBIN]]"], output: "[[BINARY:.+]]" +// OPENMP: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]" +// OPENMP: # "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" + +// RUN: %clang -x cuda %s --offload-arch=sm_52,sm_60 -Xarch_sm_52 -O3 -Xarch_sm_60 -O0 \ +// RUN: -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=CUDA %s +// CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}}"-target-cpu" "sm_52" {{.*}}"-O3" +// CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}}"-target-cpu" "sm_60" {{.*}}"-O0" diff --git a/clang/test/Driver/offload-target.c b/clang/test/Driver/offload-target.c new file mode 100644 index 0000000000000..f5d967401fe45 --- /dev/null +++ b/clang/test/Driver/offload-target.c @@ -0,0 +1,21 @@ +// RUN: %clang -### -fsycl --offload-targets=spirv64 -nogpuinc %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL +// SYCL: "spirv64" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[SYCL_BC:.+]]" + +// RUN: %clang -### --offload-targets=amdgcn-amd-amdhsa -nogpulib -nogpuinc -x hip %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=HIP +// HIP: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]" + +// RUN: %clang -### --offload-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc -x cuda %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CUDA +// CUDA: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[NV_OBJ:.+]]" + +// RUN: %clang -### --offload-targets=amdgcn-amd-amdhsa,nvptx64-nvidia-cuda -fopenmp \ +// RUN: -nogpulib -nogpuinc %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=OPENMP +// OPENMP: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]" +// OPENMP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[NV_OBJ:.+]]" + +// RUN: %clang -### --offload-targets=spirv64-amd-amdhsa -nogpulib -nogpuinc -x hip %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=HIPSPIRV +// HIPSPIRV: "spirv64-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]"