Skip to content

Commit 7b37ef1

Browse files
committed
Merge branch 'master' into layla-build
2 parents 32d6b24 + b730706 commit 7b37ef1

File tree

258 files changed

+25363
-7330
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

258 files changed

+25363
-7330
lines changed

.devops/cloud-v-pipeline

Lines changed: 0 additions & 22 deletions
This file was deleted.

.devops/cpu.Dockerfile

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,15 @@ FROM ubuntu:$UBUNTU_VERSION AS build
44

55
ARG TARGETARCH
66

7-
ARG GGML_CPU_ARM_ARCH=armv8-a
8-
97
RUN apt-get update && \
108
apt-get install -y build-essential git cmake libcurl4-openssl-dev
119

1210
WORKDIR /app
1311

1412
COPY . .
1513

16-
RUN if [ "$TARGETARCH" = "amd64" ]; then \
14+
RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
1715
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
18-
elif [ "$TARGETARCH" = "arm64" ]; then \
19-
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
2016
else \
2117
echo "Unsupported architecture"; \
2218
exit 1; \

.devops/cuda.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ RUN apt-get update \
6161
python3 \
6262
python3-pip \
6363
&& pip install --upgrade pip setuptools wheel \
64-
&& pip install -r requirements.txt \
64+
&& pip install --break-system-packages -r requirements.txt \
6565
&& apt autoremove -y \
6666
&& apt clean -y \
6767
&& rm -rf /tmp/* /var/tmp/* \

.devops/vulkan.Dockerfile

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,30 @@ ARG UBUNTU_VERSION=24.04
22

33
FROM ubuntu:$UBUNTU_VERSION AS build
44

5-
# Install build tools
6-
RUN apt update && apt install -y git build-essential cmake wget
5+
# Ref: https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html
76

8-
# Install Vulkan SDK and cURL
9-
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
10-
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
11-
apt update -y && \
12-
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
7+
# Install build tools
8+
RUN apt update && apt install -y git build-essential cmake wget xz-utils
9+
10+
# Install Vulkan SDK
11+
ARG VULKAN_VERSION=1.4.321.1
12+
RUN ARCH=$(uname -m) && \
13+
wget -qO /tmp/vulkan-sdk.tar.xz https://sdk.lunarg.com/sdk/download/${VULKAN_VERSION}/linux/vulkan-sdk-linux-${ARCH}-${VULKAN_VERSION}.tar.xz && \
14+
mkdir -p /opt/vulkan && \
15+
tar -xf /tmp/vulkan-sdk.tar.xz -C /tmp --strip-components=1 && \
16+
mv /tmp/${ARCH}/* /opt/vulkan/ && \
17+
rm -rf /tmp/*
18+
19+
# Install cURL and Vulkan SDK dependencies
20+
RUN apt install -y libcurl4-openssl-dev curl \
21+
libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev
22+
23+
# Set environment variables
24+
ENV VULKAN_SDK=/opt/vulkan
25+
ENV PATH=$VULKAN_SDK/bin:$PATH
26+
ENV LD_LIBRARY_PATH=$VULKAN_SDK/lib:$LD_LIBRARY_PATH
27+
ENV CMAKE_PREFIX_PATH=$VULKAN_SDK:$CMAKE_PREFIX_PATH
28+
ENV PKG_CONFIG_PATH=$VULKAN_SDK/lib/pkgconfig:$PKG_CONFIG_PATH
1329

1430
# Build it
1531
WORKDIR /app

.github/ISSUE_TEMPLATE/010-bug-compilation.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ body:
4040
attributes:
4141
label: GGML backends
4242
description: Which GGML backends do you know to be affected?
43-
options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL]
43+
options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL, zDNN]
4444
multiple: true
4545
validations:
4646
required: true

.github/ISSUE_TEMPLATE/011-bug-results.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ body:
4242
attributes:
4343
label: GGML backends
4444
description: Which GGML backends do you know to be affected?
45-
options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL]
45+
options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL, zDNN]
4646
multiple: true
4747
validations:
4848
required: true

.github/copilot-instructions.md

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
# Copilot Instructions for llama.cpp
2+
3+
## Repository Overview
4+
5+
llama.cpp is a large-scale C/C++ project for efficient LLM (Large Language Model) inference with minimal setup and dependencies. The project enables running language models on diverse hardware with state-of-the-art performance.
6+
7+
**Key Facts:**
8+
- **Primary language**: C/C++ with Python utility scripts
9+
- **Size**: ~200k+ lines of code across 1000+ files
10+
- **Architecture**: Modular design with main library (`libllama`) and 40+ executable tools/examples
11+
- **Core dependency**: ggml tensor library (vendored in `ggml/` directory)
12+
- **Backends supported**: CPU (AVX/NEON optimized), CUDA, Metal, Vulkan, SYCL, ROCm, MUSA
13+
- **License**: MIT
14+
15+
## Build Instructions
16+
17+
### Prerequisites
18+
- CMake 3.14+ (primary build system)
19+
- C++17 compatible compiler (GCC 13.3+, Clang, MSVC)
20+
- Optional: ccache for faster compilation
21+
22+
### Basic Build (CPU-only)
23+
**ALWAYS run these commands in sequence:**
24+
```bash
25+
cmake -B build
26+
cmake --build build --config Release -j $(nproc)
27+
```
28+
29+
**Build time**: ~10 minutes on 4-core system with ccache enabled, ~25 minutes without ccache.
30+
31+
**Important Notes:**
32+
- The Makefile is deprecated - always use CMake
33+
- ccache is automatically detected and used if available
34+
- Built binaries are placed in `build/bin/`
35+
- Parallel builds (`-j`) significantly reduce build time
36+
37+
### Backend-Specific Builds
38+
For CUDA support:
39+
```bash
40+
cmake -B build -DGGML_CUDA=ON
41+
cmake --build build --config Release -j $(nproc)
42+
```
43+
44+
For Metal (macOS):
45+
```bash
46+
cmake -B build -DGGML_METAL=ON
47+
cmake --build build --config Release -j $(nproc)
48+
```
49+
50+
**Important Note**: While all backends can be built as long as the correct requirements for that backend are installed, you will not be able to run them without the correct hardware. The only backend that can be run for testing and validation is the CPU backend.
51+
52+
### Debug Builds
53+
Single-config generators:
54+
```bash
55+
cmake -B build -DCMAKE_BUILD_TYPE=Debug
56+
cmake --build build
57+
```
58+
59+
Multi-config generators:
60+
```bash
61+
cmake -B build -G "Xcode"
62+
cmake --build build --config Debug
63+
```
64+
65+
### Common Build Issues
66+
- **Issue**: Network tests fail in isolated environments
67+
**Solution**: Expected behavior - core functionality tests will still pass
68+
69+
## Testing
70+
71+
### Running Tests
72+
```bash
73+
ctest --test-dir build --output-on-failure -j $(nproc)
74+
```
75+
76+
**Test suite**: 38 tests covering tokenizers, grammar parsing, sampling, backends, and integration
77+
**Expected failures**: 2-3 tests may fail if network access is unavailable (they download models)
78+
**Test time**: ~30 seconds for passing tests
79+
80+
### Server Unit Tests
81+
Run server-specific unit tests after building the server:
82+
```bash
83+
# Build the server first
84+
cmake --build build --target llama-server
85+
86+
# Navigate to server tests and run
87+
cd tools/server/tests
88+
source ../../../.venv/bin/activate
89+
./tests.sh
90+
```
91+
**Server test dependencies**: The `.venv` environment includes the required dependencies for server unit tests (pytest, aiohttp, etc.). Tests can be run individually or with various options as documented in `tools/server/tests/README.md`.
92+
93+
### Test Categories
94+
- Tokenizer tests: Various model tokenizers (BERT, GPT-2, LLaMA, etc.)
95+
- Grammar tests: GBNF parsing and validation
96+
- Backend tests: Core ggml operations across different backends
97+
- Integration tests: End-to-end workflows
98+
99+
### Manual Testing Commands
100+
```bash
101+
# Test basic inference
102+
./build/bin/llama-cli --version
103+
104+
# Test model loading (requires model file)
105+
./build/bin/llama-cli -m path/to/model.gguf -p "Hello" -n 10
106+
```
107+
108+
## Code Quality and Linting
109+
110+
### C++ Code Formatting
111+
**ALWAYS format C++ code before committing:**
112+
```bash
113+
git clang-format
114+
```
115+
116+
Configuration is in `.clang-format` with these key rules:
117+
- 4-space indentation
118+
- 120 column limit
119+
- Braces on same line for functions
120+
- Pointer alignment: `void * ptr` (middle)
121+
- Reference alignment: `int & ref` (middle)
122+
123+
### Python Code
124+
**ALWAYS activate the Python environment in `.venv` and use tools from that environment:**
125+
```bash
126+
# Activate virtual environment
127+
source .venv/bin/activate
128+
```
129+
130+
Configuration files:
131+
- `.flake8`: flake8 settings (max-line-length=125, excludes examples/tools)
132+
- `pyrightconfig.json`: pyright type checking configuration
133+
134+
### Pre-commit Hooks
135+
Run before committing:
136+
```bash
137+
pre-commit run --all-files
138+
```
139+
140+
## Continuous Integration
141+
142+
### GitHub Actions Workflows
143+
Key workflows that run on every PR:
144+
- `.github/workflows/build.yml`: Multi-platform builds
145+
- `.github/workflows/server.yml`: Server functionality tests
146+
- `.github/workflows/python-lint.yml`: Python code quality
147+
- `.github/workflows/python-type-check.yml`: Python type checking
148+
149+
### Local CI Validation
150+
**Run full CI locally before submitting PRs:**
151+
```bash
152+
mkdir tmp
153+
154+
# CPU-only build
155+
bash ./ci/run.sh ./tmp/results ./tmp/mnt
156+
```
157+
158+
**CI Runtime**: 30-60 minutes depending on backend configuration
159+
160+
### Triggering CI
161+
Add `ggml-ci` to commit message to trigger heavy CI workloads on the custom CI infrastructure.
162+
163+
## Project Layout and Architecture
164+
165+
### Core Directories
166+
- **`src/`**: Main llama library implementation (`llama.cpp`, `llama-*.cpp`)
167+
- **`include/`**: Public API headers, primarily `include/llama.h`
168+
- **`ggml/`**: Core tensor library (submodule with custom GGML framework)
169+
- **`examples/`**: 30+ example applications and tools
170+
- **`tools/`**: Additional development and utility tools (server benchmarks, tests)
171+
- **`tests/`**: Comprehensive test suite with CTest integration
172+
- **`docs/`**: Detailed documentation (build guides, API docs, etc.)
173+
- **`scripts/`**: Utility scripts for CI, data processing, and automation
174+
- **`common/`**: Shared utility code used across examples
175+
176+
### Key Files
177+
- **`CMakeLists.txt`**: Primary build configuration
178+
- **`include/llama.h`**: Main C API header (~2000 lines)
179+
- **`src/llama.cpp`**: Core library implementation (~8000 lines)
180+
- **`CONTRIBUTING.md`**: Coding guidelines and PR requirements
181+
- **`.clang-format`**: C++ formatting rules
182+
- **`.pre-commit-config.yaml`**: Git hook configuration
183+
184+
### Built Executables (in `build/bin/`)
185+
Primary tools:
186+
- **`llama-cli`**: Main inference tool
187+
- **`llama-server`**: OpenAI-compatible HTTP server
188+
- **`llama-quantize`**: Model quantization utility
189+
- **`llama-perplexity`**: Model evaluation tool
190+
- **`llama-bench`**: Performance benchmarking
191+
- **`llama-convert-llama2c-to-ggml`**: Model conversion utilities
192+
193+
### Configuration Files
194+
- **CMake**: `CMakeLists.txt`, `cmake/` directory
195+
- **Linting**: `.clang-format`, `.clang-tidy`, `.flake8`
196+
- **CI**: `.github/workflows/`, `ci/run.sh`
197+
- **Git**: `.gitignore` (includes build artifacts, models, cache)
198+
199+
### Dependencies
200+
- **System**: OpenMP, libcurl (for model downloading)
201+
- **Optional**: CUDA SDK, Metal framework, Vulkan SDK, Intel oneAPI
202+
- **Bundled**: httplib, json (header-only libraries in vendored form)
203+
204+
## Common Validation Steps
205+
206+
### After Making Changes
207+
1. **Format code**: `git clang-format`
208+
2. **Build**: `cmake --build build --config Release`
209+
3. **Test**: `ctest --test-dir build --output-on-failure`
210+
4. **Server tests** (if modifying server): `cd tools/server/tests && source ../../../.venv/bin/activate && ./tests.sh`
211+
5. **Manual validation**: Test relevant tools in `build/bin/`
212+
213+
### Performance Validation
214+
```bash
215+
# Benchmark inference performance
216+
./build/bin/llama-bench -m model.gguf
217+
218+
# Evaluate model perplexity
219+
./build/bin/llama-perplexity -m model.gguf -f dataset.txt
220+
```
221+
222+
### Backend Validation
223+
```bash
224+
# Test backend operations
225+
./build/bin/test-backend-ops
226+
```
227+
228+
## Environment Setup
229+
230+
### Required Tools
231+
- CMake 3.14+ (install via system package manager)
232+
- Modern C++ compiler with C++17 support
233+
- Git (for submodule management)
234+
- Python 3.9+ with virtual environment (`.venv` is provided)
235+
236+
### Optional but Recommended
237+
- ccache: `apt install ccache` or `brew install ccache`
238+
- clang-format 15+: Usually included with LLVM/Clang installation
239+
- pre-commit: `pip install pre-commit`
240+
241+
### Backend-Specific Requirements
242+
- **CUDA**: NVIDIA CUDA Toolkit 11.2+
243+
- **Metal**: Xcode command line tools (macOS only)
244+
- **Vulkan**: Vulkan SDK
245+
- **SYCL**: Intel oneAPI toolkit
246+
247+
## Important Guidelines
248+
249+
### Code Changes
250+
- **Minimal dependencies**: Avoid adding new external dependencies
251+
- **Cross-platform compatibility**: Test on Linux, macOS, Windows when possible
252+
- **Performance focus**: This is a performance-critical inference library
253+
- **API stability**: Changes to `include/llama.h` require careful consideration
254+
255+
### Git Workflow
256+
- Always create feature branches from `master`
257+
- **Never** commit build artifacts (`build/`, `.ccache/`, `*.o`, `*.gguf`)
258+
- Use descriptive commit messages following project conventions
259+
260+
### Trust These Instructions
261+
Only search for additional information if these instructions are incomplete or found to be incorrect. This document contains validated build and test procedures that work reliably across different environments.
262+

.github/labeler.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ Vulkan:
2222
- any-glob-to-any-file:
2323
- ggml/include/ggml-vulkan.h
2424
- ggml/src/ggml-vulkan/**
25+
IBM zDNN:
26+
- changed-files:
27+
- any-glob-to-any-file:
28+
- ggml/include/ggml-zdnn.h
29+
- ggml/src/ggml-zdnn/**
2530
documentation:
2631
- changed-files:
2732
- any-glob-to-any-file:

0 commit comments

Comments
 (0)