intel · bader · Jun 11, 2020 · Jun 9, 2020 · Jun 10, 2020
@@ -64,6 +64,8 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
   VLASupported = false;
   AddrSpaceMap = &NVPTXAddrSpaceMap;
   UseAddrSpaceMapMangling = true;
+  HasLegalHalfType = true;
+  HasFloat16 = true;
 
   // Define available target features
   // These must be defined in sorted order!

@@ -1753,12 +1753,7 @@ void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) {
     if (Ty->isDependentType())
       return;
 
-    auto IsSYCLDeviceCuda = getLangOpts().SYCLIsDevice &&
-                            Context.getTargetInfo().getTriple().isNVPTX();
-    if ((Ty->isFloat16Type() && !Context.getTargetInfo().hasFloat16Type() &&
-         // Disable check for SYCL CUDA BE until FP16 support is properly
-         // reported there (issue#1799)
-         !IsSYCLDeviceCuda) ||
+    if ((Ty->isFloat16Type() && !Context.getTargetInfo().hasFloat16Type()) ||
         ((Ty->isFloat128Type() ||
           (Ty->isRealFloatingType() && Context.getTypeSize(Ty) == 128)) &&
          !Context.getTargetInfo().hasFloat128Type()) ||

@@ -1521,12 +1521,11 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
       // CUDA host and device may have different _Float16 support, therefore
       // do not diagnose _Float16 usage to avoid false alarm.
       // ToDo: more precise diagnostics for CUDA.
-      auto IsSYCLDeviceCuda =
-        S.getLangOpts().SYCLIsDevice && S.Context.getTargetInfo().getTriple().isNVPTX();
-      if (!S.Context.getTargetInfo().hasFloat16Type() && !S.getLangOpts().CUDA &&
-          !(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice) && !IsSYCLDeviceCuda)
+      if (!S.Context.getTargetInfo().hasFloat16Type() &&
+          !S.getLangOpts().CUDA &&
+          !(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice))
         S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
-          << "_Float16";
+            << "_Float16";
     }
     Result = Context.Float16Ty;
     break;

@@ -0,0 +1,22 @@
+// REQUIRES: nvptx-registered-target
+// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -target-cpu sm_20 -S -o - %s | FileCheck %s -check-prefix=NOF16
+// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -target-cpu sm_60 -S -o - %s | FileCheck %s
+
+// CHECK: .target sm_60
+// NOF16: .target sm_20
+
+void f() {
+  _Float16 x, y, z;
+  // CHECK: add.rn.f16
+  // NOF16: add.rn.f32
+  z = x + y;
+  // CHECK: sub.rn.f16
+  // NOF16: sub.rn.f32
+  z = x - y;
+  // CHECK: mul.rn.f16
+  // NOF16: mul.rn.f32
+  z = x * y;
+  // CHECK: div.rn.f32
+  // NOF16: div.rn.f32
+  z = x / y;
+}