Skip to content

Commit 54cdd47

Browse files
committed
ggml : allow ggml_get_rows to use multiple threads if they are available
1 parent 529e749 commit 54cdd47

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

ggml.c

+6-6
Original file line numberDiff line numberDiff line change
@@ -17814,7 +17814,7 @@ static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const
1781417814
node->perf_time_us += time_us_cur;
1781517815
}
1781617816

17817-
static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
17817+
static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_threads) {
1781817818
int n_tasks = 0;
1781917819

1782017820
switch (node->op) {
@@ -17899,7 +17899,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
1789917899
{
1790017900
// FIXME: the cost of launching additional threads decreases performance with GPU offloading
1790117901
//n_tasks = MIN(n_threads, ggml_nelements(node->src[1]));
17902-
n_tasks = 1;
17902+
n_tasks = MIN(n_cur_threads, ggml_nelements(node->src[1]));
1790317903
} break;
1790417904
case GGML_OP_SCALE:
1790517905
case GGML_OP_SET:
@@ -18125,7 +18125,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
1812518125
/* FINALIZE */
1812618126
struct ggml_tensor * node = cgraph->nodes[node_n];
1812718127
if (GGML_OP_HAS_FINALIZE[node->op]) {
18128-
params.nth = ggml_get_n_tasks(node, n_threads);
18128+
params.nth = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
1812918129
ggml_compute_forward(&params, node);
1813018130
}
1813118131
ggml_graph_compute_perf_stats_node(node, state->shared);
@@ -18135,7 +18135,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
1813518135
while (++node_n < cgraph->n_nodes) {
1813618136
GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
1813718137
struct ggml_tensor * node = cgraph->nodes[node_n];
18138-
const int n_tasks = ggml_get_n_tasks(node, n_threads);
18138+
const int n_tasks = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
1813918139

1814018140
state->shared->perf_node_start_cycles = ggml_perf_cycles();
1814118141
state->shared->perf_node_start_time_us = ggml_perf_time_us();
@@ -18183,7 +18183,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
1818318183

1818418184
/* INIT & COMPUTE */
1818518185
struct ggml_tensor * node = cgraph->nodes[node_n];
18186-
const int n_tasks = ggml_get_n_tasks(node, n_threads);
18186+
const int n_tasks = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
1818718187

1818818188
struct ggml_compute_params params = {
1818918189
/*.type =*/ GGML_TASK_TYPE_INIT,
@@ -18248,7 +18248,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
1824818248
for (int i = 0; i < cgraph->n_nodes; i++) {
1824918249
struct ggml_tensor * node = cgraph->nodes[i];
1825018250

18251-
const int n_tasks = ggml_get_n_tasks(node, n_threads);
18251+
const int n_tasks = ggml_get_n_tasks(node, n_threads, 1);
1825218252

1825318253
max_tasks = MAX(max_tasks, n_tasks);
1825418254

0 commit comments

Comments
 (0)