diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 9f31b72bbceb1..f8b246a1a36cd 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -1847,9 +1847,13 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) { size_t NumBytes = (Val.getBitWidth() + 7) / 8; SmallVector Buf(NumBytes); - for (unsigned I = 0; I < NumBytes; ++I) { + for (unsigned I = 0; I < NumBytes - 1; ++I) { Buf[I] = Val.extractBitsAsZExtValue(8, I * 8); } + size_t LastBytePosition = (NumBytes - 1) * 8; + size_t LastByteBits = Val.getBitWidth() - LastBytePosition; + Buf[NumBytes - 1] = + Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition); AggBuffer->addBytes(Buf.data(), NumBytes, Bytes); }; diff --git a/llvm/test/CodeGen/NVPTX/i1-array-global.ll b/llvm/test/CodeGen/NVPTX/i1-array-global.ll new file mode 100644 index 0000000000000..a177f75a18dd6 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/i1-array-global.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} + +target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" +target triple = "nvptx-nvidia-cuda" + +@global_cst = private constant [6 x i1] [i1 true, i1 false, i1 true, i1 false, i1 true, i1 false] + +; CHECK: .global .align 1 .b8 global_cst[6] = {1, 0, 1, 0, 1} +define void @kernel(i32 %i, ptr %out) { + %5 = getelementptr inbounds i1, ptr @global_cst, i32 %i + %6 = load i1, ptr %5, align 1 + store i1 %6, ptr %out, align 1 + ret void +} + +!nvvm.annotations = !{!0} +!0 = !{ptr @kernel, !"kernel", i32 1} +