Skip to content

Commit 087cc0b

Browse files
committedMay 24, 2024
feat: Update llama.cpp
1 parent 5a595f0 commit 087cc0b

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed
 

‎llama_cpp/llama_cpp.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2265,6 +2265,22 @@ def llama_set_n_threads(
22652265
...
22662266

22672267

2268+
# // Get the number of threads used for generation of a single token.
2269+
# LLAMA_API uint32_t llama_n_threads(struct llama_context * ctx);
2270+
@ctypes_function("llama_n_threads", [llama_context_p_ctypes], ctypes.c_uint32)
2271+
def llama_n_threads(ctx: llama_context_p, /) -> int:
2272+
"""Get the number of threads used for generation of a single token"""
2273+
...
2274+
2275+
2276+
# // Get the number of threads used for prompt and batch processing (multiple token).
2277+
# LLAMA_API uint32_t llama_n_threads_batch(struct llama_context * ctx);
2278+
@ctypes_function("llama_n_threads_batch", [llama_context_p_ctypes], ctypes.c_uint32)
2279+
def llama_n_threads_batch(ctx: llama_context_p, /) -> int:
2280+
"""Get the number of threads used for prompt and batch processing (multiple token)"""
2281+
...
2282+
2283+
22682284
# // Set whether to use causal attention or not
22692285
# // If set to true, the model will only attend to the past tokens
22702286
# LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);

‎vendor/llama.cpp

0 commit comments

Comments
 (0)
Please sign in to comment.