Skip to content

Commit 2a9564d

Browse files
authored
Merge pull request #76 from l3utterfly/master
merge from upstream
2 parents e38d2a6 + aaa088d commit 2a9564d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+30313
-888
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ jobs:
342342
cd build
343343
export GGML_VK_VISIBLE_DEVICES=0
344344
# This is using llvmpipe and runs slower than other backends
345-
ctest -L main --verbose --timeout 3600
345+
ctest -L main --verbose --timeout 4200
346346
347347
ubuntu-22-cmake-hip:
348348
runs-on: ubuntu-22.04

.github/workflows/update-ops-docs.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
name: Update Operations Documentation
2+
3+
on:
4+
push:
5+
paths:
6+
- 'docs/ops/**'
7+
- 'scripts/create_ops_docs.py'
8+
pull_request:
9+
paths:
10+
- 'docs/ops/**'
11+
- 'scripts/create_ops_docs.py'
12+
13+
jobs:
14+
update-ops-docs:
15+
runs-on: ubuntu-latest
16+
17+
steps:
18+
- name: Checkout repository
19+
uses: actions/checkout@v4
20+
21+
- name: Set up Python
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: '3.x'
25+
26+
- name: Generate operations documentation to temporary file
27+
run: |
28+
mkdir -p /tmp/ops_check
29+
./scripts/create_ops_docs.py /tmp/ops_check/ops.md
30+
31+
- name: Check if docs/ops.md matches generated version
32+
run: |
33+
if ! diff -q docs/ops.md /tmp/ops_check/ops.md; then
34+
echo "Operations documentation (docs/ops.md) is not up to date with the backend CSV files."
35+
echo "To fix: run ./scripts/create_ops_docs.py and commit the updated docs/ops.md along with your changes"
36+
echo "Differences found:"
37+
diff docs/ops.md /tmp/ops_check/ops.md || true
38+
exit 1
39+
fi
40+
echo "Operations documentation is up to date."

README.md

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
[![Release](https://img.shields.io/github/v/release/ggml-org/llama.cpp)](https://github.com/ggml-org/llama.cpp/releases)
77
[![Server](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml/badge.svg)](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml)
88

9-
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Manifesto](https://github.com/ggml-org/llama.cpp/discussions/205) / [ggml](https://github.com/ggml-org/ggml)
9+
[Manifesto](https://github.com/ggml-org/llama.cpp/discussions/205) / [ggml](https://github.com/ggml-org/ggml) / [ops](https://github.com/ggml-org/llama.cpp/blob/master/docs/ops.md)
1010

11-
Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++
11+
LLM inference in C/C++
1212

1313
## Recent API changes
1414

@@ -17,10 +17,9 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
1717

1818
## Hot topics
1919

20-
- 🔥 Multimodal support arrived in `llama-server`: [#12898](https://github.com/ggml-org/llama.cpp/pull/12898) | [documentation](./docs/multimodal.md)
21-
- A new binary `llama-mtmd-cli` is introduced to replace `llava-cli`, `minicpmv-cli`, `gemma3-cli` ([#13012](https://github.com/ggml-org/llama.cpp/pull/13012)) and `qwen2vl-cli` ([#13141](https://github.com/ggml-org/llama.cpp/pull/13141)), `libllava` will be deprecated
20+
- Hot PRs: [All](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Apr+label%3Ahot+) | [Open](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Apr+label%3Ahot+is%3Aopen)
21+
- Multimodal support arrived in `llama-server`: [#12898](https://github.com/ggml-org/llama.cpp/pull/12898) | [documentation](./docs/multimodal.md)
2222
- VS Code extension for FIM completions: https://github.com/ggml-org/llama.vscode
23-
- Universal [tool call support](./docs/function-calling.md) in `llama-server` https://github.com/ggml-org/llama.cpp/pull/9639
2423
- Vim/Neovim plugin for FIM completions: https://github.com/ggml-org/llama.vim
2524
- Introducing GGUF-my-LoRA https://github.com/ggml-org/llama.cpp/discussions/10123
2625
- Hugging Face Inference Endpoints now support GGUF out of the box! https://github.com/ggml-org/llama.cpp/discussions/9669

common/CMakeLists.txt

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,7 @@ if (LLAMA_CURL)
117117
endif()
118118
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
119119
include_directories(${CURL_INCLUDE_DIRS})
120-
find_library(CURL_LIBRARY curl REQUIRED)
121-
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
120+
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
122121
endif ()
123122

124123
if (LLAMA_LLGUIDANCE)
@@ -143,13 +142,13 @@ if (LLAMA_LLGUIDANCE)
143142

144143
ExternalProject_Add(llguidance_ext
145144
GIT_REPOSITORY https://github.com/guidance-ai/llguidance
146-
# v0.7.20 (+ fix to build on GCC 15):
147-
GIT_TAG b5b8b64dba11c4e4ee6b1d1450d3a3ae279891e8
145+
# v1.0.1:
146+
GIT_TAG d795912fedc7d393de740177ea9ea761e7905774
148147
PREFIX ${CMAKE_BINARY_DIR}/llguidance
149148
SOURCE_DIR ${LLGUIDANCE_SRC}
150149
BUILD_IN_SOURCE TRUE
151150
CONFIGURE_COMMAND ""
152-
BUILD_COMMAND cargo build --release
151+
BUILD_COMMAND cargo build --release --package llguidance
153152
INSTALL_COMMAND ""
154153
BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/${LLGUIDANCE_LIB_NAME} ${LLGUIDANCE_PATH}/llguidance.h
155154
UPDATE_COMMAND ""

common/arg.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2734,6 +2734,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
27342734
params.public_path = value;
27352735
}
27362736
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH"));
2737+
add_opt(common_arg(
2738+
{"--api-prefix"}, "PREFIX",
2739+
string_format("prefix path the server serves from, without the trailing slash (default: %s)", params.api_prefix.c_str()),
2740+
[](common_params & params, const std::string & value) {
2741+
params.api_prefix = value;
2742+
}
2743+
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_API_PREFIX"));
27372744
add_opt(common_arg(
27382745
{"--no-webui"},
27392746
string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"),

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,7 @@ struct common_params {
370370

371371
std::string hostname = "127.0.0.1";
372372
std::string public_path = ""; // NOLINT
373+
std::string api_prefix = ""; // NOLINT
373374
std::string chat_template = ""; // NOLINT
374375
bool use_jinja = false; // NOLINT
375376
bool enable_chat_template = true;

0 commit comments

Comments
 (0)