Skip to content

Commit b141fc2

Browse files
authored
ggml: fix ggml_conv_1d_dw bug (#1323)
* ggml: fix ggml_conv_1d_dw bug * Fixed conv1d_dw weight tensor dimension.
1 parent c765c8f commit b141fc2

File tree

4 files changed

+506
-3
lines changed

4 files changed

+506
-3
lines changed

src/ggml.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4272,14 +4272,13 @@ struct ggml_tensor * ggml_conv_1d_dw(
42724272
int s0,
42734273
int p0,
42744274
int d0) {
4275-
struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], 1, a->ne[1], a->ne[2]);
42764275
struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
42774276

4278-
struct ggml_tensor * im2col = ggml_im2col(ctx, new_a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
4277+
struct ggml_tensor * im2col = ggml_im2col(ctx, a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
42794278

42804279
struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
42814280

4282-
result = ggml_reshape_3d(ctx, result, b->ne[0], b->ne[1], 1);
4281+
result = ggml_reshape_3d(ctx, result, result->ne[0], result->ne[2], 1);
42834282

42844283
return result;
42854284
}

tests/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,24 @@ if (NOT GGML_BACKEND_DL)
300300
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}>)
301301
set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw")
302302

303+
#
304+
# test-conv1d-dw-c1
305+
306+
set(TEST_TARGET test-conv1d-dw-c1)
307+
add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp)
308+
target_link_libraries(${TEST_TARGET} PRIVATE ggml)
309+
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}>)
310+
set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw")
311+
312+
#
313+
# test-conv1d-dw-c2
314+
315+
set(TEST_TARGET test-conv1d-dw-c2)
316+
add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp)
317+
target_link_libraries(${TEST_TARGET} PRIVATE ggml)
318+
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}>)
319+
set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw")
320+
303321
#
304322
# test-conv2d
305323

tests/test-conv1d-dw-c1.cpp

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
#include "ggml.h"
2+
#include "ggml-cpu.h"
3+
#include "ggml-alloc.h"
4+
#include "ggml-backend.h"
5+
6+
#ifdef GGML_USE_CUDA
7+
#include "ggml-cuda.h"
8+
#endif
9+
10+
#ifdef GGML_USE_METAL
11+
#include "ggml-metal.h"
12+
#endif
13+
14+
#include <cassert>
15+
#include <cmath>
16+
#include <cstdio>
17+
#include <cstring>
18+
#include <fstream>
19+
#include <map>
20+
#include <string>
21+
#include <vector>
22+
23+
static void ggml_log_callback_default(ggml_log_level level, const char * text, void * user_data) {
24+
(void) level;
25+
(void) user_data;
26+
fputs(text, stderr);
27+
fflush(stderr);
28+
}
29+
30+
struct test_model {
31+
struct ggml_tensor * weight;
32+
struct ggml_tensor * input;
33+
ggml_backend_t backend = NULL;
34+
ggml_backend_buffer_t buffer;
35+
struct ggml_context * ctx;
36+
};
37+
38+
void load_model(test_model & model, bool use_gpu = false) {
39+
// create data
40+
int K = 3, IC = 2, OC = 2;
41+
int IL = 6, N = 1;
42+
43+
// Initialize adata
44+
float weight_data[6] = {10.0f, 20.0f, 30.0f, 0.1f, 0.2f, 0.3f};
45+
46+
// Convert adata to fp16 format
47+
std::vector<ggml_fp16_t> h_weight_data(K * IC);
48+
ggml_fp32_to_fp16_row(weight_data, h_weight_data.data(), K * IC);
49+
50+
// Initialize input data, 2 channels, 6 timesteps, 1 batch
51+
float input_data[12] = {
52+
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
53+
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
54+
};
55+
56+
size_t buffer_size = 0;
57+
{
58+
buffer_size += K * IC * ggml_type_size(GGML_TYPE_F16); // tensor weight
59+
buffer_size += IL * IC * N * ggml_type_size(GGML_TYPE_F32); // tensor input
60+
buffer_size += 1024; // overhead
61+
}
62+
63+
printf("%s: ggml tensor size = %d bytes\n", __func__, (int) sizeof(ggml_tensor));
64+
printf("%s: backend buffer size = %0.2f MB\n", __func__, (buffer_size/ 1024.f/ 1024.f));
65+
66+
ggml_log_set(ggml_log_callback_default, nullptr);
67+
68+
int num_tensors = 2;
69+
struct ggml_init_params params {
70+
/*.mem_size =*/ ggml_tensor_overhead() * num_tensors,
71+
/*.mem_buffer =*/ NULL,
72+
/*.no_alloc =*/ true,
73+
};
74+
75+
// initialize the backend
76+
#ifdef GGML_USE_CUDA
77+
if (use_gpu) {
78+
fprintf(stderr, "%s: using CUDA backend\n", __func__);
79+
model.backend = ggml_backend_cuda_init(0);
80+
if (!model.backend) {
81+
fprintf(stderr, "%s: ggml_backend_cuda_init() failed\n", __func__);
82+
}
83+
}
84+
#endif
85+
86+
#ifdef GGML_USE_METAL
87+
if (use_gpu) {
88+
fprintf(stderr, "%s: using Metal backend\n", __func__);
89+
model.backend = ggml_backend_metal_init();
90+
if (!model.backend) {
91+
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
92+
}
93+
}
94+
#endif
95+
96+
if(!model.backend) {
97+
// fallback to CPU backend
98+
model.backend = ggml_backend_cpu_init();
99+
}
100+
101+
model.buffer = ggml_backend_alloc_buffer(model.backend, buffer_size);
102+
103+
// create context
104+
model.ctx = ggml_init(params);
105+
106+
// create tensors
107+
// A Pytorch grouped Conv1d weight parameter is of shape (out_channels, input_channels/groups, kernel_size)
108+
model.weight = ggml_new_tensor_3d(model.ctx, GGML_TYPE_F16, K, 1, IC);
109+
model.input = ggml_new_tensor_3d(model.ctx, GGML_TYPE_F32, IL, IC, N);
110+
111+
// create a allocator
112+
ggml_tallocr alloc = ggml_tallocr_new(model.buffer);
113+
114+
// alloc memory
115+
ggml_tallocr_alloc(&alloc, model.weight);
116+
117+
// load data to buffer
118+
if(ggml_backend_is_cpu(model.backend)) {
119+
memcpy(model.weight->data, h_weight_data.data(), ggml_nbytes(model.weight));
120+
} else {
121+
ggml_backend_tensor_set(model.weight, h_weight_data.data(), 0, ggml_nbytes(model.weight));
122+
}
123+
124+
// alloc memory
125+
ggml_tallocr_alloc(&alloc, model.input);
126+
127+
if(ggml_backend_is_cpu(model.backend)
128+
#ifdef GGML_USE_METAL
129+
|| ggml_backend_is_metal(model.backend)
130+
#endif
131+
) {
132+
memcpy(model.input->data, input_data, ggml_nbytes(model.input));
133+
} else {
134+
ggml_backend_tensor_set(model.input, input_data, 0, ggml_nbytes(model.input));
135+
}
136+
}
137+
138+
struct ggml_cgraph * build_graph(const test_model& model) {
139+
static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
140+
static std::vector<uint8_t> buf(buf_size);
141+
142+
struct ggml_init_params params0 = {
143+
/*.mem_size =*/ buf_size,
144+
/*.mem_buffer =*/ buf.data(),
145+
/*.no_alloc =*/ true, // the tensors will be allocated later by ggml_gallocr_alloc_graph()
146+
};
147+
148+
// create a temporally context to build the graph
149+
struct ggml_context * ctx0 = ggml_init(params0);
150+
151+
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
152+
153+
int s0 = 1;
154+
int p0 = 1;
155+
int d0 = 1;
156+
157+
struct ggml_tensor* conv1d_dw_res = ggml_conv_1d_dw(ctx0, model.weight, model.input, s0, p0, d0);
158+
ggml_set_name(conv1d_dw_res, "conv1d_dw_res");
159+
ggml_build_forward_expand(gf, conv1d_dw_res);
160+
161+
// delete the temporally context used to build the graph
162+
ggml_free(ctx0);
163+
return gf;
164+
}
165+
166+
struct ggml_cgraph* compute_graph(const test_model & model, ggml_gallocr_t allocr) {
167+
struct ggml_cgraph * gf = build_graph(model);
168+
169+
// allocate tensors
170+
ggml_gallocr_alloc_graph(allocr, gf);
171+
int n_threads = 1;
172+
173+
if (ggml_backend_is_cpu(model.backend)) {
174+
ggml_backend_cpu_set_n_threads(model.backend, n_threads);
175+
}
176+
177+
ggml_backend_graph_compute(model.backend, gf);
178+
179+
//ggml_graph_print(gf);
180+
181+
return gf;
182+
}
183+
184+
int main(void)
185+
{
186+
ggml_time_init();
187+
188+
test_model model;
189+
load_model(model, true);
190+
191+
ggml_gallocr_t allocr = NULL;
192+
193+
{
194+
allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend));
195+
196+
//create the worst case graph for memory usage estimation
197+
struct ggml_cgraph * gf = build_graph(model);
198+
199+
// compute the required memory
200+
ggml_gallocr_reserve(allocr, gf);
201+
size_t mem_size = ggml_gallocr_get_buffer_size(allocr, 0);
202+
fprintf(stderr, "%s: compute buffer size: %.2f MB\n", __func__, mem_size/1024.0f/1024.0f);
203+
}
204+
205+
struct ggml_cgraph * gf_res = compute_graph(model, allocr);
206+
207+
struct ggml_tensor * conv1d_dw_res = NULL;
208+
209+
for(int i = 0; i < ggml_graph_n_nodes(gf_res); i++) {
210+
if(strcmp(ggml_get_name(ggml_graph_node(gf_res, i)), "conv1d_dw_res") == 0) {
211+
conv1d_dw_res = ggml_graph_node(gf_res, i);
212+
}
213+
}
214+
215+
std::vector<float> conv2d_data(ggml_nelements(conv1d_dw_res));
216+
217+
ggml_backend_tensor_get(conv1d_dw_res, conv2d_data.data(), 0, ggml_nbytes(conv1d_dw_res));
218+
219+
const int n_conv1d_dw_test = 12;
220+
221+
float expected_conv1d_dw[n_conv1d_dw_test] = {
222+
50.0f, 60.0f, 60.0f, 60.0f, 60.0f, 30.0f, 0.50f, 0.60f, 0.60f, 0.60f, 0.60f, 0.30f
223+
};
224+
225+
printf("\nPerforming test:\n");
226+
227+
bool passed = true;
228+
passed = true;
229+
for(int i = 0; i < n_conv1d_dw_test; i++) {
230+
if(std::abs(conv2d_data[i] - expected_conv1d_dw[i]) > 1e-4) {
231+
passed = false;
232+
break;
233+
}
234+
}
235+
236+
printf("ggml_conv1d (%d): %s\n", (int) ggml_nelements(conv1d_dw_res), passed && (ggml_nelements(conv1d_dw_res) == n_conv1d_dw_test) ? "\033[32mPASSED\033[0m" : "\033[31mFAILED\033[0m");
237+
ggml_free(model.ctx);
238+
239+
ggml_backend_buffer_free(model.buffer);
240+
ggml_backend_free(model.backend);
241+
ggml_gallocr_free(allocr);
242+
return 0;
243+
}

0 commit comments

Comments
 (0)