Skip to content
This repository was archived by the owner on Mar 25, 2025. It is now read-only.

Commit 98fce50

Browse files
Initial commit
0 parents  commit 98fce50

File tree

500 files changed

+275960
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

500 files changed

+275960
-0
lines changed

.clang-tidy

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
---
2+
Checks: >
3+
bugprone-*,
4+
-bugprone-easily-swappable-parameters,
5+
-bugprone-implicit-widening-of-multiplication-result,
6+
-bugprone-misplaced-widening-cast,
7+
-bugprone-narrowing-conversions,
8+
readability-*,
9+
-readability-avoid-unconditional-preprocessor-if,
10+
-readability-function-cognitive-complexity,
11+
-readability-identifier-length,
12+
-readability-implicit-bool-conversion,
13+
-readability-magic-numbers,
14+
-readability-uppercase-literal-suffix,
15+
clang-analyzer-*,
16+
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
17+
performance-*,
18+
portability-*,
19+
misc-*,
20+
-misc-const-correctness,
21+
-misc-non-private-member-variables-in-classes,
22+
-misc-no-recursion,
23+
FormatStyle: none

.devops/cloud-v-pipeline

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
2+
stage('Cleanup'){
3+
cleanWs() // Cleaning previous CI build in workspace
4+
}
5+
stage('checkout repo'){
6+
retry(5){ // Retry if the cloning fails due to some reason
7+
checkout scm // Clone the repo on Runner
8+
}
9+
}
10+
stage('Compiling llama.cpp'){
11+
sh'''#!/bin/bash
12+
make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
13+
'''
14+
}
15+
stage('Running llama.cpp'){
16+
sh'''#!/bin/bash
17+
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
18+
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./main -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
19+
cat llama_log.txt # Printing results
20+
'''
21+
}
22+
}

.devops/full-cuda.Dockerfile

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
ARG UBUNTU_VERSION=22.04
2+
3+
# This needs to generally match the container host's environment.
4+
ARG CUDA_VERSION=11.7.1
5+
6+
# Target the CUDA build image
7+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_CUDA_DEV_CONTAINER} as build
10+
11+
# Unless otherwise specified, we make a fat build.
12+
ARG CUDA_DOCKER_ARCH=all
13+
14+
RUN apt-get update && \
15+
apt-get install -y build-essential python3 python3-pip git
16+
17+
COPY requirements.txt requirements.txt
18+
COPY requirements requirements
19+
20+
RUN pip install --upgrade pip setuptools wheel \
21+
&& pip install -r requirements.txt
22+
23+
WORKDIR /app
24+
25+
COPY . .
26+
27+
# Set nvcc architecture
28+
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
29+
# Enable cuBLAS
30+
ENV LLAMA_CUBLAS=1
31+
32+
RUN make
33+
34+
ENTRYPOINT ["/app/.devops/tools.sh"]

.devops/full-rocm.Dockerfile

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
ARG UBUNTU_VERSION=22.04
2+
3+
# This needs to generally match the container host's environment.
4+
ARG ROCM_VERSION=5.6
5+
6+
# Target the CUDA build image
7+
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
8+
9+
FROM ${BASE_ROCM_DEV_CONTAINER} as build
10+
11+
# Unless otherwise specified, we make a fat build.
12+
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
13+
# This is mostly tied to rocBLAS supported archs.
14+
ARG ROCM_DOCKER_ARCH=\
15+
gfx803 \
16+
gfx900 \
17+
gfx906 \
18+
gfx908 \
19+
gfx90a \
20+
gfx1010 \
21+
gfx1030 \
22+
gfx1100 \
23+
gfx1101 \
24+
gfx1102
25+
26+
COPY requirements.txt requirements.txt
27+
COPY requirements requirements
28+
29+
RUN pip install --upgrade pip setuptools wheel \
30+
&& pip install -r requirements.txt
31+
32+
WORKDIR /app
33+
34+
COPY . .
35+
36+
# Set nvcc architecture
37+
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
38+
# Enable ROCm
39+
ENV LLAMA_HIPBLAS=1
40+
ENV CC=/opt/rocm/llvm/bin/clang
41+
ENV CXX=/opt/rocm/llvm/bin/clang++
42+
43+
RUN make
44+
45+
ENTRYPOINT ["/app/.devops/tools.sh"]

.devops/full.Dockerfile

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
ARG UBUNTU_VERSION=22.04
2+
3+
FROM ubuntu:$UBUNTU_VERSION as build
4+
5+
RUN apt-get update && \
6+
apt-get install -y build-essential python3 python3-pip git
7+
8+
COPY requirements.txt requirements.txt
9+
COPY requirements requirements
10+
11+
RUN pip install --upgrade pip setuptools wheel \
12+
&& pip install -r requirements.txt
13+
14+
WORKDIR /app
15+
16+
COPY . .
17+
18+
RUN make
19+
20+
ENV LC_ALL=C.utf8
21+
22+
ENTRYPOINT ["/app/.devops/tools.sh"]
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# SRPM for building from source and packaging an RPM for RPM-based distros.
2+
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
3+
# Built and maintained by John Boero - [email protected]
4+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5+
6+
# Notes for llama.cpp:
7+
# 1. Tags are currently based on hash - which will not sort asciibetically.
8+
# We need to declare standard versioning if people want to sort latest releases.
9+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
10+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
11+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
12+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
13+
# It is up to the user to install the correct vendor-specific support.
14+
15+
Name: llama.cpp-clblast
16+
Version: %( date "+%%Y%%m%%d" )
17+
Release: 1%{?dist}
18+
Summary: OpenCL Inference of LLaMA model in C/C++
19+
License: MIT
20+
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
21+
BuildRequires: coreutils make gcc-c++ git mesa-libOpenCL-devel clblast-devel
22+
Requires: clblast
23+
URL: https://github.com/ggerganov/llama.cpp
24+
25+
%define debug_package %{nil}
26+
%define source_date_epoch_from_changelog 0
27+
28+
%description
29+
CPU inference for Meta's Lllama2 models using default options.
30+
31+
%prep
32+
%setup -n llama.cpp-master
33+
34+
%build
35+
make -j LLAMA_CLBLAST=1
36+
37+
%install
38+
mkdir -p %{buildroot}%{_bindir}/
39+
cp -p main %{buildroot}%{_bindir}/llamaclblast
40+
cp -p server %{buildroot}%{_bindir}/llamaclblastserver
41+
cp -p simple %{buildroot}%{_bindir}/llamaclblastsimple
42+
43+
mkdir -p %{buildroot}/usr/lib/systemd/system
44+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamaclblast.service
45+
[Unit]
46+
Description=Llama.cpp server, CPU only (no GPU support in this build).
47+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
48+
49+
[Service]
50+
Type=simple
51+
EnvironmentFile=/etc/sysconfig/llama
52+
ExecStart=/usr/bin/llamaclblastserver $LLAMA_ARGS
53+
ExecReload=/bin/kill -s HUP $MAINPID
54+
Restart=never
55+
56+
[Install]
57+
WantedBy=default.target
58+
EOF
59+
60+
mkdir -p %{buildroot}/etc/sysconfig
61+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
62+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
63+
EOF
64+
65+
%clean
66+
rm -rf %{buildroot}
67+
rm -rf %{_builddir}/*
68+
69+
%files
70+
%{_bindir}/llamaclblast
71+
%{_bindir}/llamaclblastserver
72+
%{_bindir}/llamaclblastsimple
73+
/usr/lib/systemd/system/llamaclblast.service
74+
%config /etc/sysconfig/llama
75+
76+
77+
%pre
78+
79+
%post
80+
81+
%preun
82+
%postun
83+
84+
%changelog

.devops/llama-cpp-cublas.srpm.spec

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# SRPM for building from source and packaging an RPM for RPM-based distros.
2+
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
3+
# Built and maintained by John Boero - [email protected]
4+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5+
6+
# Notes for llama.cpp:
7+
# 1. Tags are currently based on hash - which will not sort asciibetically.
8+
# We need to declare standard versioning if people want to sort latest releases.
9+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
10+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
11+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
12+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
13+
# It is up to the user to install the correct vendor-specific support.
14+
15+
Name: llama.cpp-cublas
16+
Version: %( date "+%%Y%%m%%d" )
17+
Release: 1%{?dist}
18+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
19+
License: MIT
20+
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
21+
BuildRequires: coreutils make gcc-c++ git cuda-toolkit
22+
Requires: cuda-toolkit
23+
URL: https://github.com/ggerganov/llama.cpp
24+
25+
%define debug_package %{nil}
26+
%define source_date_epoch_from_changelog 0
27+
28+
%description
29+
CPU inference for Meta's Lllama2 models using default options.
30+
31+
%prep
32+
%setup -n llama.cpp-master
33+
34+
%build
35+
make -j LLAMA_CUBLAS=1
36+
37+
%install
38+
mkdir -p %{buildroot}%{_bindir}/
39+
cp -p main %{buildroot}%{_bindir}/llamacppcublas
40+
cp -p server %{buildroot}%{_bindir}/llamacppcublasserver
41+
cp -p simple %{buildroot}%{_bindir}/llamacppcublassimple
42+
43+
mkdir -p %{buildroot}/usr/lib/systemd/system
44+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacublas.service
45+
[Unit]
46+
Description=Llama.cpp server, CPU only (no GPU support in this build).
47+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
48+
49+
[Service]
50+
Type=simple
51+
EnvironmentFile=/etc/sysconfig/llama
52+
ExecStart=/usr/bin/llamacppcublasserver $LLAMA_ARGS
53+
ExecReload=/bin/kill -s HUP $MAINPID
54+
Restart=never
55+
56+
[Install]
57+
WantedBy=default.target
58+
EOF
59+
60+
mkdir -p %{buildroot}/etc/sysconfig
61+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
62+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
63+
EOF
64+
65+
%clean
66+
rm -rf %{buildroot}
67+
rm -rf %{_builddir}/*
68+
69+
%files
70+
%{_bindir}/llamacppcublas
71+
%{_bindir}/llamacppcublasserver
72+
%{_bindir}/llamacppcublassimple
73+
/usr/lib/systemd/system/llamacublas.service
74+
%config /etc/sysconfig/llama
75+
76+
%pre
77+
78+
%post
79+
80+
%preun
81+
%postun
82+
83+
%changelog

0 commit comments

Comments
 (0)