Skip to content

Commit f377f88

Browse files
committed
Merge branch 'master' into Nexes_CQ_10
2 parents e3ec684 + d09770c commit f377f88

34 files changed

+1013
-145
lines changed

.github/workflows/build.yml

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -967,6 +967,7 @@ jobs:
967967
name: llama-bin-win-sycl-x64.zip
968968

969969
windows-latest-cmake-hip:
970+
if: ${{ github.event.inputs.create_release != 'true' }}
970971
runs-on: windows-latest
971972

972973
steps:
@@ -994,8 +995,72 @@ jobs:
994995
run: |
995996
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
996997
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
997-
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON
998-
cmake --build build --config Release
998+
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
999+
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1000+
1001+
windows-latest-cmake-hip-release:
1002+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1003+
runs-on: windows-latest
1004+
1005+
strategy:
1006+
matrix:
1007+
gpu_target: [gfx1100, gfx1101, gfx1030]
1008+
1009+
steps:
1010+
- name: Clone
1011+
id: checkout
1012+
uses: actions/checkout@v4
1013+
1014+
- name: Install
1015+
id: depends
1016+
run: |
1017+
$ErrorActionPreference = "Stop"
1018+
write-host "Downloading AMD HIP SDK Installer"
1019+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
1020+
write-host "Installing AMD HIP SDK"
1021+
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
1022+
write-host "Completed AMD HIP SDK installation"
1023+
1024+
- name: Verify ROCm
1025+
id: verify
1026+
run: |
1027+
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
1028+
1029+
- name: Build
1030+
id: cmake_build
1031+
run: |
1032+
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
1033+
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1034+
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
1035+
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1036+
md "build\bin\rocblas\library\"
1037+
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
1038+
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
1039+
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
1040+
1041+
- name: Determine tag name
1042+
id: tag
1043+
shell: bash
1044+
run: |
1045+
BUILD_NUMBER="$(git rev-list --count HEAD)"
1046+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1047+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1048+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1049+
else
1050+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1051+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1052+
fi
1053+
1054+
- name: Pack artifacts
1055+
id: pack_artifacts
1056+
run: |
1057+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
1058+
1059+
- name: Upload artifacts
1060+
uses: actions/upload-artifact@v4
1061+
with:
1062+
path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
1063+
name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
9991064

10001065
ios-xcode-build:
10011066
runs-on: macos-latest
@@ -1060,6 +1125,7 @@ jobs:
10601125
- macOS-latest-cmake
10611126
- windows-latest-cmake
10621127
- windows-latest-cmake-cuda
1128+
- windows-latest-cmake-hip-release
10631129
- macOS-latest-cmake-arm64
10641130
- macOS-latest-cmake-x64
10651131

examples/infill/infill.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ static void sigint_handler(int signo) {
9797
LOG("\n");
9898
gpt_perf_print(*g_ctx, *g_smpl);
9999
write_logfile(*g_ctx, *g_params, *g_model, *g_input_tokens, g_output_ss->str(), *g_output_tokens);
100+
101+
// make sure all logs are flushed
102+
LOG("Interrupted by user\n");
103+
gpt_log_pause(gpt_log_main());
104+
100105
_exit(130);
101106
}
102107
}

examples/main/main.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,11 @@ static void sigint_handler(int signo) {
116116
LOG("\n");
117117
gpt_perf_print(*g_ctx, *g_smpl);
118118
write_logfile(*g_ctx, *g_params, *g_model, *g_input_tokens, g_output_ss->str(), *g_output_tokens);
119+
120+
// make sure all logs are flushed
121+
LOG("Interrupted by user\n");
122+
gpt_log_pause(gpt_log_main());
123+
119124
_exit(130);
120125
}
121126
}

examples/perplexity/perplexity.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1961,6 +1961,7 @@ int main(int argc, char ** argv) {
19611961

19621962
params.n_ctx = 512;
19631963
params.logits_all = true;
1964+
params.escape = false;
19641965

19651966
if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_PERPLEXITY)) {
19661967
return 1;

examples/quantize/quantize.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,24 @@ static const char * const LLM_KV_QUANTIZE_IMATRIX_DATASET = "quantize.imatrix
7474
static const char * const LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES = "quantize.imatrix.entries_count";
7575
static const char * const LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS = "quantize.imatrix.chunks_count";
7676

77+
static bool striequals(const char * a, const char * b) {
78+
while (*a && *b) {
79+
if (std::tolower(*a) != std::tolower(*b)) {
80+
return false;
81+
}
82+
a++; b++;
83+
}
84+
return *a == *b;
85+
}
86+
7787
static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) {
7888
std::string ftype_str;
7989

8090
for (auto ch : ftype_str_in) {
8191
ftype_str.push_back(std::toupper(ch));
8292
}
8393
for (auto & it : QUANT_OPTIONS) {
84-
if (it.name == ftype_str) {
94+
if (striequals(it.name.c_str(), ftype_str.c_str())) {
8595
ftype = it.ftype;
8696
ftype_str_out = it.name;
8797
return true;
@@ -236,15 +246,15 @@ static int prepare_imatrix(const std::string & imatrix_file,
236246
}
237247

238248
static ggml_type parse_ggml_type(const char * arg) {
239-
ggml_type result = GGML_TYPE_COUNT;
240-
for (int j = 0; j < GGML_TYPE_COUNT; ++j) {
241-
auto type = ggml_type(j);
249+
for (int i = 0; i < GGML_TYPE_COUNT; ++i) {
250+
auto type = (ggml_type)i;
242251
const auto * name = ggml_type_name(type);
243-
if (name && strcmp(arg, name) == 0) {
244-
result = type; break;
252+
if (name && striequals(name, arg)) {
253+
return type;
245254
}
246255
}
247-
return result;
256+
fprintf(stderr, "%s: invalid ggml_type '%s'\n", __func__, arg);
257+
return GGML_TYPE_COUNT;
248258
}
249259

250260
int main(int argc, char ** argv) {
@@ -265,12 +275,18 @@ int main(int argc, char ** argv) {
265275
} else if (strcmp(argv[arg_idx], "--output-tensor-type") == 0) {
266276
if (arg_idx < argc-1) {
267277
params.output_tensor_type = parse_ggml_type(argv[++arg_idx]);
278+
if (params.output_tensor_type == GGML_TYPE_COUNT) {
279+
usage(argv[0]);
280+
}
268281
} else {
269282
usage(argv[0]);
270283
}
271284
} else if (strcmp(argv[arg_idx], "--token-embedding-type") == 0) {
272285
if (arg_idx < argc-1) {
273286
params.token_embedding_type = parse_ggml_type(argv[++arg_idx]);
287+
if (params.token_embedding_type == GGML_TYPE_COUNT) {
288+
usage(argv[0]);
289+
}
274290
} else {
275291
usage(argv[0]);
276292
}

ggml/include/ggml-backend.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ extern "C" {
6666
// "offset" refers to the offset of the tensor data for setting/getting data
6767
GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
6868
GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
69+
GGML_API GGML_CALL void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
6970

7071
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
7172

@@ -122,7 +123,7 @@ extern "C" {
122123
// The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
123124

124125
GGML_API size_t ggml_backend_reg_get_count(void);
125-
GGML_API size_t ggml_backend_reg_find_by_name(const char * name);
126+
GGML_API size_t ggml_backend_reg_find_by_name(const char * name); // returns index of backend with name, or SIZE_MAX if not found
126127
GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is backend_name:params (params is optional)
127128
GGML_API const char * ggml_backend_reg_get_name(size_t i);
128129
GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific

ggml/include/ggml.h

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,7 @@ extern "C" {
534534

535535
GGML_OP_CROSS_ENTROPY_LOSS,
536536
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
537+
GGML_OP_OPT_STEP_ADAMW,
537538

538539
GGML_OP_COUNT,
539540
};
@@ -571,10 +572,12 @@ extern "C" {
571572
GGML_LOG_LEVEL_DEBUG = 4,
572573
};
573574

575+
// this tensor...
574576
enum ggml_tensor_flag {
575-
GGML_TENSOR_FLAG_INPUT = 1,
576-
GGML_TENSOR_FLAG_OUTPUT = 2,
577-
GGML_TENSOR_FLAG_PARAM = 4,
577+
GGML_TENSOR_FLAG_INPUT = 1, // ...is an input for the GGML compute graph
578+
GGML_TENSOR_FLAG_OUTPUT = 2, // ...is an output for the GGML compute graph
579+
GGML_TENSOR_FLAG_PARAM = 4, // ...contains trainable parameters
580+
GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
578581
};
579582

580583
// n-dimensional tensor
@@ -2037,23 +2040,44 @@ extern "C" {
20372040
struct ggml_tensor * b,
20382041
struct ggml_tensor * c);
20392042

2043+
// AdamW optimizer step
2044+
// Paper: https://arxiv.org/pdf/1711.05101v3.pdf
2045+
// PyTorch: https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html
2046+
GGML_API struct ggml_tensor * ggml_opt_step_adamw(
2047+
struct ggml_context * ctx,
2048+
struct ggml_tensor * a,
2049+
float alpha,
2050+
float beta1,
2051+
float beta2,
2052+
float eps,
2053+
float wd); // weight decay
2054+
20402055
//
20412056
// automatic differentiation
20422057
//
20432058

2044-
GGML_API void ggml_set_param(
2045-
struct ggml_context * ctx,
2046-
struct ggml_tensor * tensor);
2059+
GGML_API void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor);
2060+
GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
20472061

20482062
GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
2049-
GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
2063+
GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool accumulate, bool keep);
2064+
2065+
GGML_API void ggml_build_opt_adamw(
2066+
struct ggml_context * ctx,
2067+
struct ggml_cgraph * gf,
2068+
struct ggml_cgraph * gb,
2069+
float alpha,
2070+
float beta1,
2071+
float beta2,
2072+
float eps,
2073+
float wd); // weight decay
20502074

20512075
// graph allocation in a context
20522076
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
20532077
GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
20542078
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
20552079
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
2056-
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
2080+
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
20572081
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
20582082

20592083
GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph);

ggml/src/ggml-alloc.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,12 @@ static void ggml_dyn_tallocr_reset(struct ggml_dyn_tallocr * alloc) {
294294
alloc->free_blocks[0].offset = 0;
295295
alloc->free_blocks[0].size = SIZE_MAX/2; // restrict maximum size of a measure allocator to half size_t max to avoid overflows
296296
alloc->max_size = 0;
297+
298+
#ifdef GGML_ALLOCATOR_DEBUG
299+
for (int i = 0; i < 1024; i++) {
300+
alloc->allocated_tensors[i].tensor = NULL;
301+
}
302+
#endif
297303
}
298304

299305
static struct ggml_dyn_tallocr * ggml_dyn_tallocr_new(size_t alignment) {

ggml/src/ggml-backend-impl.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,16 @@ extern "C" {
3838
typedef void * ggml_backend_buffer_context_t;
3939

4040
struct ggml_backend_buffer_i {
41-
const char * (*GGML_CALL get_name) (ggml_backend_buffer_t buffer);
42-
void (*GGML_CALL free_buffer)(ggml_backend_buffer_t buffer);
43-
void * (*GGML_CALL get_base) (ggml_backend_buffer_t buffer);
44-
void (*GGML_CALL init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
45-
void (*GGML_CALL set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
46-
void (*GGML_CALL get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
47-
bool (*GGML_CALL cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst); // dst is in the buffer, src may be in any buffer
48-
void (*GGML_CALL clear) (ggml_backend_buffer_t buffer, uint8_t value);
49-
void (*GGML_CALL reset) (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
41+
const char * (*GGML_CALL get_name) (ggml_backend_buffer_t buffer);
42+
void (*GGML_CALL free_buffer) (ggml_backend_buffer_t buffer);
43+
void * (*GGML_CALL get_base) (ggml_backend_buffer_t buffer);
44+
void (*GGML_CALL init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
45+
void (*GGML_CALL memset_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
46+
void (*GGML_CALL set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
47+
void (*GGML_CALL get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
48+
bool (*GGML_CALL cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst); // dst is in the buffer, src may be in any buffer
49+
void (*GGML_CALL clear) (ggml_backend_buffer_t buffer, uint8_t value);
50+
void (*GGML_CALL reset) (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
5051
};
5152

5253
struct ggml_backend_buffer {

ggml/src/ggml-backend.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,22 @@ GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void *
246246
buf->iface.get_tensor(buf, tensor, data, offset, size);
247247
}
248248

249+
GGML_API GGML_CALL void ggml_backend_tensor_memset(struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
250+
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
251+
252+
GGML_ASSERT(buf != NULL && "tensor buffer not set");
253+
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
254+
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
255+
256+
if (!size) {
257+
return;
258+
}
259+
260+
GGML_ASSERT(buf->iface.memset_tensor != NULL && "memset not supported by backend buffer");
261+
262+
buf->iface.memset_tensor(buf, tensor, value, offset, size);
263+
}
264+
249265
void ggml_backend_synchronize(ggml_backend_t backend) {
250266
if (backend->iface.synchronize == NULL) {
251267
return;
@@ -569,6 +585,12 @@ GGML_CALL static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t
569585
free(buffer->context);
570586
}
571587

588+
GGML_CALL static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
589+
memset((char *)tensor->data + offset, value, size);
590+
591+
GGML_UNUSED(buffer);
592+
}
593+
572594
GGML_CALL static void ggml_backend_cpu_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
573595
memcpy((char *)tensor->data + offset, data, size);
574596

@@ -600,6 +622,7 @@ static struct ggml_backend_buffer_i cpu_backend_buffer_i = {
600622
/* .free_buffer = */ ggml_backend_cpu_buffer_free_buffer,
601623
/* .get_base = */ ggml_backend_cpu_buffer_get_base,
602624
/* .init_tensor = */ NULL, // no initialization required
625+
/* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
603626
/* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor,
604627
/* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor,
605628
/* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
@@ -613,6 +636,7 @@ static struct ggml_backend_buffer_i cpu_backend_buffer_i_from_ptr = {
613636
/* .free_buffer = */ NULL, // ptr is not owned by the buffer, so it does not need to be freed
614637
/* .get_base = */ ggml_backend_cpu_buffer_get_base,
615638
/* .init_tensor = */ NULL, // no initialization required
639+
/* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
616640
/* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor,
617641
/* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor,
618642
/* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
@@ -980,6 +1004,7 @@ static struct ggml_backend_buffer_i ggml_backend_multi_buffer_context_interface(
9801004
/* .free_buffer = */ ggml_backend_multi_buffer_free_buffer,
9811005
/* .get_base = */ NULL,
9821006
/* .init_tensor = */ NULL,
1007+
/* .memset_tensor = */ NULL,
9831008
/* .set_tensor = */ NULL,
9841009
/* .get_tensor = */ NULL,
9851010
/* .cpy_tensor = */ NULL,

0 commit comments

Comments
 (0)