Skip to content

Commit b47f854

Browse files
committed
Add support for Radeon VII
1 parent fdb47ca commit b47f854

File tree

3 files changed

+10
-2
lines changed

3 files changed

+10
-2
lines changed

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ endif
5959

6060
# AMD flags
6161
ROCM_PATH ?= /opt/rocm
62-
AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-offload-arch -a)
62+
AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
6363
HIPCC := $(shell which hipcc 2>/dev/null)
6464
HIPIFY := $(shell which hipify-perl 2>/dev/null)
6565
HIPCC_FLAGS = -O3 -march=native -I$(BUILD_DIR)/hip -fno-strict-aliasing
@@ -69,6 +69,10 @@ ifneq ($(filter gfx1100,$(AMDGPU_TARGETS)),)
6969
USE_HIPBLAS ?= 1
7070
USE_CK ?= 1
7171
AMDGPU_TARGETS := gfx1100
72+
else ifneq ($(filter gfx906,$(AMDGPU_TARGETS)),)
73+
WAVEFRONTSIZE64 ?= 1
74+
USE_HIPBLAS ?= 1
75+
AMDGPU_TARGETS := gfx906
7276
else ifneq ($(filter gfx90a,$(AMDGPU_TARGETS)),)
7377
WAVEFRONTSIZE64 ?= 1
7478
BUILD_XDL ?= 1

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# llm.c for AMD devices
2-
This is a fork of [Andrej Karpathy's llm.c](https://github.com/karpathy/llm.c) with support for AMD's RDNA and CDNA devices.
2+
This is a fork of [Andrej Karpathy's llm.c](https://github.com/karpathy/llm.c) with support for AMD devices.
3+
4+
It has been tested on Radeon VII (aka gfx906), MI250X (aka gfx90a), and 7900 XTX (aka gfx1100).
35

46
## Performance
57

llmc/mfu.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ static const PerfData HOPPER = {378.f, 756.f, 756.f, 756.f, 1513.f, 1513.f, 1620
2929
static const PerfData ADA = {82.6f, 165.2f, 165.2f, 330.3f, 330.3f, 660.6f, 2520.f, 512.f};
3030
static const PerfData RDNA3 = {61.42f, 122.8f, 122.8f, -1.f, -1.f, -1.f, 2500.f, 384.f};
3131
static const PerfData CDNA2 = {95.7f, 383.0f, 383.0f, -1.f, -1.f, -1.f, 1690.f, 208.f};
32+
static const PerfData GCN5 = {10.75f, 10.75f, 21.5f, -1.f, -1.f, -1.f, 1750.f, 60.f};
3233

3334
typedef struct {
3435
const char* name;
@@ -79,6 +80,7 @@ static GPUEntry gpu_db[] = {
7980
{"NVIDIA H100 80GB HBM3", &HOPPER, 528, 1830}, // HBM3 = SXM5
8081
{"Radeon RX 7900 XTX", &RDNA3, 384, 2500},
8182
{"AMD Instinct MI250X/MI250", &CDNA2, 208, 1690},
83+
{"AMD Radeon VII", &GCN5, 60, 1750},
8284
};
8385

8486
float get_flops_promised(const char* device, int precision_mode) {

0 commit comments

Comments
 (0)