Skip to content

Commit ecac76d

Browse files
committed
Update ggml files
1 parent 332e6e5 commit ecac76d

12 files changed

+18869
-13
lines changed

Makefile

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
# Define the default target now so that it is always the first target
2+
default: main quantize
3+
4+
ifndef UNAME_S
5+
UNAME_S := $(shell uname -s)
6+
endif
7+
8+
ifndef UNAME_P
9+
UNAME_P := $(shell uname -p)
10+
endif
11+
12+
ifndef UNAME_M
13+
UNAME_M := $(shell uname -m)
14+
endif
15+
16+
CCV := $(shell $(CC) --version | head -n 1)
17+
CXXV := $(shell $(CXX) --version | head -n 1)
18+
19+
# Mac OS + Arm can report x86_64
20+
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
21+
ifeq ($(UNAME_S),Darwin)
22+
ifneq ($(UNAME_P),arm)
23+
SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
24+
ifeq ($(SYSCTL_M),1)
25+
# UNAME_P := arm
26+
# UNAME_M := arm64
27+
warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
28+
endif
29+
endif
30+
endif
31+
32+
#
33+
# Compile flags
34+
#
35+
36+
# keep standard at C11 and C++11
37+
CFLAGS = -I. -O3 -std=c11 -fPIC
38+
CXXFLAGS = -I. -I./examples -O3 -std=c++11 -fPIC
39+
LDFLAGS =
40+
41+
ifndef LLAMA_DEBUG
42+
CFLAGS += -DNDEBUG
43+
CXXFLAGS += -DNDEBUG
44+
endif
45+
46+
# warnings
47+
CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith
48+
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
49+
50+
# OS specific
51+
# TODO: support Windows
52+
ifeq ($(UNAME_S),Linux)
53+
CFLAGS += -pthread
54+
CXXFLAGS += -pthread
55+
endif
56+
ifeq ($(UNAME_S),Darwin)
57+
CFLAGS += -pthread
58+
CXXFLAGS += -pthread
59+
endif
60+
ifeq ($(UNAME_S),FreeBSD)
61+
CFLAGS += -pthread
62+
CXXFLAGS += -pthread
63+
endif
64+
ifeq ($(UNAME_S),NetBSD)
65+
CFLAGS += -pthread
66+
CXXFLAGS += -pthread
67+
endif
68+
ifeq ($(UNAME_S),OpenBSD)
69+
CFLAGS += -pthread
70+
CXXFLAGS += -pthread
71+
endif
72+
ifeq ($(UNAME_S),Haiku)
73+
CFLAGS += -pthread
74+
CXXFLAGS += -pthread
75+
endif
76+
77+
ifdef LLAMA_GPROF
78+
CFLAGS += -pg
79+
CXXFLAGS += -pg
80+
endif
81+
ifdef LLAMA_PERF
82+
CFLAGS += -DGGML_PERF
83+
CXXFLAGS += -DGGML_PERF
84+
endif
85+
86+
# Architecture specific
87+
# TODO: probably these flags need to be tweaked on some architectures
88+
# feel free to update the Makefile for your architecture and send a pull request or issue
89+
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
90+
# Use all CPU extensions that are available:
91+
CFLAGS += -march=native -mtune=native
92+
CXXFLAGS += -march=native -mtune=native
93+
94+
# Usage AVX-only
95+
#CFLAGS += -mfma -mf16c -mavx
96+
#CXXFLAGS += -mfma -mf16c -mavx
97+
endif
98+
ifneq ($(filter ppc64%,$(UNAME_M)),)
99+
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
100+
ifneq (,$(findstring POWER9,$(POWER9_M)))
101+
CFLAGS += -mcpu=power9
102+
CXXFLAGS += -mcpu=power9
103+
endif
104+
# Require c++23's std::byteswap for big-endian support.
105+
ifeq ($(UNAME_M),ppc64)
106+
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
107+
endif
108+
endif
109+
ifndef LLAMA_NO_ACCELERATE
110+
# Mac M1 - include Accelerate framework.
111+
# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
112+
ifeq ($(UNAME_S),Darwin)
113+
CFLAGS += -DGGML_USE_ACCELERATE
114+
LDFLAGS += -framework Accelerate
115+
endif
116+
endif
117+
ifdef LLAMA_OPENBLAS
118+
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
119+
ifneq ($(shell grep -e "Arch Linux" -e "ID_LIKE=arch" /etc/os-release 2>/dev/null),)
120+
LDFLAGS += -lopenblas -lcblas
121+
else
122+
LDFLAGS += -lopenblas
123+
endif
124+
endif
125+
ifdef LLAMA_CUBLAS
126+
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
127+
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
128+
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
129+
OBJS += ggml-cuda.o
130+
NVCC = nvcc
131+
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native
132+
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
133+
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@
134+
endif
135+
ifdef LLAMA_CLBLAST
136+
CFLAGS += -DGGML_USE_CLBLAST
137+
# Mac provides OpenCL as a framework
138+
ifeq ($(UNAME_S),Darwin)
139+
LDFLAGS += -lclblast -framework OpenCL
140+
else
141+
LDFLAGS += -lclblast -lOpenCL
142+
endif
143+
OBJS += ggml-opencl.o
144+
ggml-opencl.o: ggml-opencl.c ggml-opencl.h
145+
$(CC) $(CFLAGS) -c $< -o $@
146+
endif
147+
ifneq ($(filter aarch64%,$(UNAME_M)),)
148+
# Apple M1, M2, etc.
149+
# Raspberry Pi 3, 4, Zero 2 (64-bit)
150+
CFLAGS += -mcpu=native
151+
CXXFLAGS += -mcpu=native
152+
endif
153+
ifneq ($(filter armv6%,$(UNAME_M)),)
154+
# Raspberry Pi 1, Zero
155+
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
156+
endif
157+
ifneq ($(filter armv7%,$(UNAME_M)),)
158+
# Raspberry Pi 2
159+
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
160+
endif
161+
ifneq ($(filter armv8%,$(UNAME_M)),)
162+
# Raspberry Pi 3, 4, Zero 2 (32-bit)
163+
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
164+
endif
165+
166+
#
167+
# Print build information
168+
#
169+
170+
$(info I llama.cpp build info: )
171+
$(info I UNAME_S: $(UNAME_S))
172+
$(info I UNAME_P: $(UNAME_P))
173+
$(info I UNAME_M: $(UNAME_M))
174+
$(info I CFLAGS: $(CFLAGS))
175+
$(info I CXXFLAGS: $(CXXFLAGS))
176+
$(info I LDFLAGS: $(LDFLAGS))
177+
$(info I CC: $(CCV))
178+
$(info I CXX: $(CXXV))
179+
$(info )
180+
181+
#
182+
# Build library
183+
#
184+
185+
ggml.o: ggml.c ggml.h ggml-cuda.h
186+
$(CC) $(CFLAGS) -c $< -o $@
187+
188+
common.o: common.cpp common.h
189+
$(CXX) $(CXXFLAGS) -c $< -o $@
190+
191+
clean:
192+
rm -vf *.o main quantize
193+
194+
#
195+
# Examples
196+
#
197+
198+
main: main.cpp common.o ggml.o $(OBJS)
199+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
200+
@echo
201+
@echo '==== Run ./main -h for help. ===='
202+
@echo
203+
204+
quantize: quantize.cpp ggml.o common.o $(OBJS)
205+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

README.md

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,16 @@ git clone https://github.com/ggerganov/ggml
7272
cd ggml
7373

7474
# Convert HF model to ggml
75-
python examples/starcoder/convert-hf-to-ggml.py bigcode/gpt_bigcode-santacoder
75+
python convert-hf-to-ggml.py bigcode/gpt_bigcode-santacoder
7676

77-
# Build ggml + examples
78-
mkdir build && cd build
79-
cmake .. && make -j4 starcoder starcoder-quantize
77+
# Build ggml libraries
78+
make
8079

8180
# quantize the model
82-
./bin/starcoder-quantize ../models/bigcode/gpt_bigcode-santacoder-ggml.bin ../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin 3
81+
./quantize models/bigcode/gpt_bigcode-santacoder-ggml.bin models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin 3
8382

8483
# run inference
85-
./bin/starcoder -m ../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin -p "def fibonnaci(" --top_k 0 --top_p 0.95 --temp 0.2
84+
./main -m models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin -p "def fibonnaci(" --top_k 0 --top_p 0.95 --temp 0.2
8685
```
8786

8887

@@ -92,7 +91,7 @@ You can download the original model and convert it to `ggml` format using the sc
9291

9392
```
9493
# Convert HF model to ggml
95-
python examples/starcoder/convert-hf-to-ggml.py bigcode/gpt_bigcode-santacoder
94+
python convert-hf-to-ggml.py bigcode/gpt_bigcode-santacoder
9695
```
9796

9897
This conversion requires that you have python and Transformers installed on your computer.
@@ -103,7 +102,7 @@ You can also try to quantize the `ggml` models via 4-bit integer quantization.
103102

104103
```
105104
# quantize the model
106-
./bin/starcoder-quantize ../models/bigcode/gpt_bigcode-santacoder-ggml.bin ../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin 3
105+
./quantize models/bigcode/gpt_bigcode-santacoder-ggml.bin models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin 3
107106
```
108107

109108
| Model | Original size | Quantized size | Quantization type |

0 commit comments

Comments
 (0)