From e59047b188e13648e5caaa2e01cbc6f996bc8993 Mon Sep 17 00:00:00 2001 From: William Zijie Zhang Date: Tue, 21 Apr 2026 21:54:07 -0400 Subject: [PATCH 1/7] initial iteration for convolve atom --- include/atoms/affine.h | 8 + include/subexpr.h | 17 ++ src/atoms/affine/convolve.c | 274 ++++++++++++++++++++ tests/all_tests.c | 11 + tests/forward_pass/affine/test_convolve.h | 53 ++++ tests/jacobian_tests/affine/test_convolve.h | 68 +++++ tests/problem/test_param_broadcast.h | 65 +++++ tests/problem/test_param_prob.h | 92 +++++++ tests/wsum_hess/affine/test_convolve.h | 57 ++++ 9 files changed, 645 insertions(+) create mode 100644 src/atoms/affine/convolve.c create mode 100644 tests/forward_pass/affine/test_convolve.h create mode 100644 tests/jacobian_tests/affine/test_convolve.h create mode 100644 tests/wsum_hess/affine/test_convolve.h diff --git a/include/atoms/affine.h b/include/atoms/affine.h index 94afe8b..a66add6 100644 --- a/include/atoms/affine.h +++ b/include/atoms/affine.h @@ -65,4 +65,12 @@ expr *new_scalar_mult(expr *param_node, expr *child); * param_node */ expr *new_vector_mult(expr *param_node, expr *child); +/* 1D full convolution: y = conv(a, child) where a is the kernel held by + * param_node (length m) and child is a length-n vector (shape (n, 1) or + * (1, n)). Output is length (m + n - 1). param_node may be PARAM_FIXED + * or an updatable parameter; the kernel values refresh automatically on + * problem_update_params (same convention as new_vector_mult / + * new_scalar_mult). */ +expr *new_convolve(expr *param_node, expr *child); + #endif /* AFFINE_H */ diff --git a/include/subexpr.h b/include/subexpr.h index 9c57d23..b225c84 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -148,6 +148,23 @@ typedef struct vector_mult_expr expr *param_source; } vector_mult_expr; +/* 1D convolution: y = conv(a, child) where a is a length-m kernel held by + * param_source. Output has size (m + n - 1) where n is the child length. + * Forward and wsum_hess backprop are computed as direct loops; for Jacobian + * we materialize T(a) as a CSR once at jacobian_init and reuse the engine's + * block-left-mult machinery for composite children. */ +typedef struct convolve_expr +{ + expr base; + expr *param_source; /* length-m kernel */ + int m; /* kernel length */ + int n; /* input length */ + Matrix *T; /* (m+n-1) x n Toeplitz Sparse_Matrix */ + CSC_Matrix *Jchild_CSC; + CSC_Matrix *J_CSC; + int *csc_to_csr_work; +} convolve_expr; + /* Bivariate matrix multiplication: Z = f(u) @ g(u) where both children * may be composite expressions. */ typedef struct matmul_expr diff --git a/src/atoms/affine/convolve.c b/src/atoms/affine/convolve.c new file mode 100644 index 0000000..e6466d8 --- /dev/null +++ b/src/atoms/affine/convolve.c @@ -0,0 +1,274 @@ +/* + * Copyright 2026 Daniel Cederberg and William Zhang + * + * This file is part of the SparseDiffEngine project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "atoms/affine.h" +#include "subexpr.h" +#include "utils/CSR_Matrix.h" +#include "utils/matrix.h" +#include "utils/tracked_alloc.h" +#include +#include +#include + +/* 1D full convolution: y = conv(a, child), where a is a length-m kernel + * held by param_source and child is a length-n vector. Output length is + * m + n - 1. In matrix form, y = T(a) @ child, where T(a) is the + * (m+n-1) x n Toeplitz matrix with T(a)[k, j] = a[k-j] when 0 <= k-j < m. + * + * Forward and Hessian backprop (w' = T(a)^T w) are direct loops, avoiding + * CSR traversal and AT materialization. Jacobian values go through the + * existing block-left-mult path so composite children work correctly. + * + * When param_source is updatable, forward() refreshes T(a)'s CSR values + * from the new kernel before running the convolution. The sparsity of + * T(a) is kernel-independent (staircase), so only values are rewritten. */ + +/* Fill T(a)'s CSR row pointers, column indices, and values from kernel a. + Row r holds T[r, col] = a[r - col] for col in + [max(0, r-m+1), min(n-1, r)]. */ +static void build_toeplitz(CSR_Matrix *T_csr, const double *a, int m, int n) +{ + int out_rows = m + n - 1; + int nnz = 0; + for (int r = 0; r < out_rows; r++) + { + T_csr->p[r] = nnz; + int col_lo = r - m + 1 > 0 ? r - m + 1 : 0; + int col_hi = r < n - 1 ? r : n - 1; + for (int col = col_lo; col <= col_hi; col++) + { + T_csr->i[nnz] = col; + T_csr->x[nnz] = a[r - col]; + nnz++; + } + } + T_csr->p[out_rows] = nnz; +} + +/* Overwrite T(a)'s CSR values (only) from the current kernel. Sparsity + pattern is unchanged, so we just walk the existing CSR entries. */ +static void refresh_toeplitz_values(convolve_expr *cnode) +{ + CSR_Matrix *T_csr = ((Sparse_Matrix *) cnode->T)->csr; + const double *a = cnode->param_source->value; + int out_rows = cnode->m + cnode->n - 1; + for (int r = 0; r < out_rows; r++) + { + for (int k = T_csr->p[r]; k < T_csr->p[r + 1]; k++) + { + T_csr->x[k] = a[r - T_csr->i[k]]; + } + } +} + +static void forward(expr *node, const double *u) +{ + expr *child = node->left; + convolve_expr *cnode = (convolve_expr *) node; + + if (cnode->base.needs_parameter_refresh) + { + cnode->param_source->forward(cnode->param_source, NULL); + /* T is NULL until jacobian_init_impl runs; forward can be called + first in standalone tests, in which case T is built later from + the already-refreshed param values. */ + if (cnode->T != NULL) + { + refresh_toeplitz_values(cnode); + } + cnode->base.needs_parameter_refresh = false; + } + + child->forward(child, u); + + const double *a = cnode->param_source->value; + const double *x = child->value; + double *y = node->value; + int m = cnode->m; + int n = cnode->n; + + memset(y, 0, node->size * sizeof(double)); + for (int j = 0; j < n; j++) + { + for (int i = 0; i < m; i++) + { + y[i + j] += a[i] * x[j]; + } + } +} + +static void jacobian_init_impl(expr *node) +{ + expr *child = node->left; + convolve_expr *cnode = (convolve_expr *) node; + int m = cnode->m; + int n = cnode->n; + const double *a = cnode->param_source->value; + + jacobian_init(child); + + /* Build T(a) as CSR: (m+n-1) x n, m*n nonzeros, staircase pattern. */ + int out_rows = m + n - 1; + CSR_Matrix *T_csr = new_csr_matrix(out_rows, n, m * n); + build_toeplitz(T_csr, a, m, n); + + cnode->T = new_sparse_matrix(T_csr); + free_csr_matrix(T_csr); + + /* Reuse left_matmul's sparsity precompute: J_node = T @ J_child via + block_left_mult with n_blocks = 1. */ + cnode->Jchild_CSC = csr_to_csc_alloc(child->jacobian, node->work->iwork); + cnode->J_CSC = + cnode->T->block_left_mult_sparsity(cnode->T, cnode->Jchild_CSC, 1); + node->jacobian = csc_to_csr_alloc(cnode->J_CSC, cnode->csc_to_csr_work); +} + +static void eval_jacobian(expr *node) +{ + expr *child = node->left; + convolve_expr *cnode = (convolve_expr *) node; + + child->eval_jacobian(child); + + /* T values have been refreshed by forward() if the kernel changed; here + we just need to refresh child's Jacobian and rerun the block matmul. */ + csr_to_csc_fill_values(child->jacobian, cnode->Jchild_CSC, node->work->iwork); + cnode->T->block_left_mult_values(cnode->T, cnode->Jchild_CSC, cnode->J_CSC); + csc_to_csr_fill_values(cnode->J_CSC, node->jacobian, cnode->csc_to_csr_work); +} + +static void wsum_hess_init_impl(expr *node) +{ + expr *child = node->left; + convolve_expr *cnode = (convolve_expr *) node; + + wsum_hess_init(child); + node->wsum_hess = new_csr_copy_sparsity(child->wsum_hess); + node->work->dwork = (double *) SP_MALLOC(cnode->n * sizeof(double)); +} + +static void eval_wsum_hess(expr *node, const double *w) +{ + expr *child = node->left; + convolve_expr *cnode = (convolve_expr *) node; + int m = cnode->m; + int n = cnode->n; + const double *a = cnode->param_source->value; + double *w_prime = node->work->dwork; + + /* w' = T(a)^T w. T(a)^T[j, k] = a[k-j] for j <= k < j+m, so + w'[j] = sum_{i=0..m-1} a[i] * w[i + j]. */ + memset(w_prime, 0, n * sizeof(double)); + for (int j = 0; j < n; j++) + { + for (int i = 0; i < m; i++) + { + w_prime[j] += a[i] * w[i + j]; + } + } + + child->eval_wsum_hess(child, w_prime); + memcpy(node->wsum_hess->x, child->wsum_hess->x, + child->wsum_hess->nnz * sizeof(double)); +} + +static bool is_affine(const expr *node) +{ + return node->left->is_affine(node->left); +} + +static void free_type_data(expr *node) +{ + convolve_expr *cnode = (convolve_expr *) node; + free_matrix(cnode->T); + free_csc_matrix(cnode->Jchild_CSC); + free_csc_matrix(cnode->J_CSC); + free(cnode->csc_to_csr_work); + if (cnode->param_source != NULL) + { + free_expr(cnode->param_source); + } + cnode->T = NULL; + cnode->Jchild_CSC = NULL; + cnode->J_CSC = NULL; + cnode->csc_to_csr_work = NULL; + cnode->param_source = NULL; +} + +expr *new_convolve(expr *param_node, expr *child) +{ + if (param_node == NULL) + { + fprintf(stderr, "Error in new_convolve: param_node must not be NULL\n"); + exit(1); + } + + int m = param_node->size; + int n; + int out_d1, out_d2; + + if (child->d2 == 1) + { + /* child is (n, 1) column vector */ + n = child->d1; + out_d1 = m + n - 1; + out_d2 = 1; + } + else if (child->d1 == 1) + { + /* child is (1, n) row vector */ + n = child->d2; + out_d1 = 1; + out_d2 = m + n - 1; + } + else + { + fprintf(stderr, "Error in new_convolve: child must be a vector " + "(shape (n, 1) or (1, n))\n"); + exit(1); + } + + if (m < 1 || n < 1) + { + fprintf(stderr, "Error in new_convolve: m and n must be >= 1\n"); + exit(1); + } + + convolve_expr *cnode = + (convolve_expr *) SP_CALLOC(1, sizeof(convolve_expr)); + expr *node = &cnode->base; + init_expr(node, out_d1, out_d2, child->n_vars, forward, jacobian_init_impl, + eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, + free_type_data); + node->left = child; + expr_retain(child); + + cnode->param_source = param_node; + expr_retain(param_node); + cnode->m = m; + cnode->n = n; + + /* iwork is used for csr_to_csc of child's Jacobian (size n_vars). */ + node->work->iwork = (int *) SP_MALLOC(node->n_vars * sizeof(int)); + cnode->csc_to_csr_work = (int *) SP_MALLOC(node->size * sizeof(int)); + + /* Ensure first forward() pulls current param values through any + broadcast/promote wrappers and reflects them in T (once T is built). */ + cnode->base.needs_parameter_refresh = true; + + return node; +} diff --git a/tests/all_tests.c b/tests/all_tests.c index efaad33..4af1dd4 100644 --- a/tests/all_tests.c +++ b/tests/all_tests.c @@ -7,6 +7,7 @@ #ifndef PROFILE_ONLY #include "forward_pass/affine/test_add.h" #include "forward_pass/affine/test_broadcast.h" +#include "forward_pass/affine/test_convolve.h" #include "forward_pass/affine/test_diag_mat.h" #include "forward_pass/affine/test_hstack.h" #include "forward_pass/affine/test_left_matmul_dense.h" @@ -25,6 +26,7 @@ #include "forward_pass/other/test_prod_axis_one.h" #include "forward_pass/other/test_prod_axis_zero.h" #include "jacobian_tests/affine/test_broadcast.h" +#include "jacobian_tests/affine/test_convolve.h" #include "jacobian_tests/affine/test_diag_mat.h" #include "jacobian_tests/affine/test_hstack.h" #include "jacobian_tests/affine/test_index.h" @@ -65,6 +67,7 @@ #include "utils/test_linalg_utils_matmul_chain_rule.h" #include "utils/test_matrix.h" #include "wsum_hess/affine/test_broadcast.h" +#include "wsum_hess/affine/test_convolve.h" #include "wsum_hess/affine/test_diag_mat.h" #include "wsum_hess/affine/test_hstack.h" #include "wsum_hess/affine/test_index.h" @@ -134,6 +137,8 @@ int main(void) mu_run_test(test_forward_prod_axis_one, tests_run); mu_run_test(test_matmul, tests_run); mu_run_test(test_left_matmul_dense, tests_run); + mu_run_test(test_convolve_forward, tests_run); + mu_run_test(test_convolve_forward_param, tests_run); mu_run_test(test_diag_mat_forward, tests_run); mu_run_test(test_upper_tri_forward_4x4, tests_run); @@ -215,6 +220,8 @@ int main(void) mu_run_test(test_jacobian_right_matmul_log, tests_run); mu_run_test(test_jacobian_right_matmul_log_vector, tests_run); mu_run_test(test_jacobian_matmul, tests_run); + mu_run_test(test_jacobian_convolve, tests_run); + mu_run_test(test_jacobian_convolve_composite, tests_run); mu_run_test(test_jacobian_transpose, tests_run); mu_run_test(test_diag_mat_jacobian_variable, tests_run); mu_run_test(test_diag_mat_jacobian_of_log, tests_run); @@ -280,6 +287,8 @@ int main(void) mu_run_test(test_wsum_hess_matmul_yx, tests_run); mu_run_test(test_wsum_hess_right_matmul, tests_run); mu_run_test(test_wsum_hess_right_matmul_vector, tests_run); + mu_run_test(test_wsum_hess_convolve, tests_run); + mu_run_test(test_wsum_hess_convolve_composite, tests_run); mu_run_test(test_wsum_hess_broadcast_row, tests_run); mu_run_test(test_wsum_hess_broadcast_col, tests_run); mu_run_test(test_wsum_hess_broadcast_scalar_to_matrix, tests_run); @@ -374,6 +383,7 @@ int main(void) mu_run_test(test_param_shared_left_matmul_problem, tests_run); mu_run_test(test_param_fixed_skip_in_update, tests_run); mu_run_test(test_param_scalar_mult_problem_with_constant, tests_run); + mu_run_test(test_param_convolve_problem, tests_run); printf("\n--- Parameter + Broadcast Tests ---\n"); mu_run_test(test_constant_broadcast_vector_mult, tests_run); @@ -384,6 +394,7 @@ int main(void) mu_run_test(test_param_sum_scalar_mult, tests_run); mu_run_test(test_const_hstack_left_matmul, tests_run); mu_run_test(test_param_hstack_left_matmul, tests_run); + mu_run_test(test_param_scalar_mult_convolve, tests_run); #endif /* PROFILE_ONLY */ #ifdef PROFILE_ONLY diff --git a/tests/forward_pass/affine/test_convolve.h b/tests/forward_pass/affine/test_convolve.h new file mode 100644 index 0000000..6a2cfd2 --- /dev/null +++ b/tests/forward_pass/affine/test_convolve.h @@ -0,0 +1,53 @@ +#include +#include + +#include "atoms/affine.h" +#include "expr.h" +#include "minunit.h" +#include "subexpr.h" +#include "test_helpers.h" + +const char *test_convolve_forward(void) +{ + /* Test: y = conv([1, 2, 3], x) where x = [1, 2, 3]. + * Expected y = [1, 4, 10, 12, 9]. */ + double kernel[3] = {1.0, 2.0, 3.0}; + expr *kernel_param = new_parameter(3, 1, PARAM_FIXED, 3, kernel); + expr *x = new_variable(3, 1, 0, 3); + expr *y = new_convolve(kernel_param, x); + + double u[3] = {1.0, 2.0, 3.0}; + y->forward(y, u); + + double expected[5] = {1.0, 4.0, 10.0, 12.0, 9.0}; + + mu_assert("convolve result should have d1=5", y->d1 == 5); + mu_assert("convolve result should have d2=1", y->d2 == 1); + mu_assert("convolve result should have size=5", y->size == 5); + mu_assert("Convolve forward pass test failed", + cmp_double_array(y->value, expected, 5)); + + free_expr(y); + return 0; +} + +const char *test_convolve_forward_param(void) +{ + /* Same as test_convolve_forward, but the kernel is an updatable parameter + * (param_id = 0). Exercises the needs_parameter_refresh path in forward(). */ + double kernel[3] = {1.0, 2.0, 3.0}; + expr *kernel_param = new_parameter(3, 1, 0, 3, kernel); + expr *x = new_variable(3, 1, 0, 3); + expr *y = new_convolve(kernel_param, x); + + double u[3] = {1.0, 2.0, 3.0}; + y->forward(y, u); + + double expected[5] = {1.0, 4.0, 10.0, 12.0, 9.0}; + + mu_assert("Convolve forward pass with param kernel failed", + cmp_double_array(y->value, expected, 5)); + + free_expr(y); + return 0; +} diff --git a/tests/jacobian_tests/affine/test_convolve.h b/tests/jacobian_tests/affine/test_convolve.h new file mode 100644 index 0000000..c3aae98 --- /dev/null +++ b/tests/jacobian_tests/affine/test_convolve.h @@ -0,0 +1,68 @@ +#include +#include + +#include "atoms/affine.h" +#include "atoms/elementwise_restricted_dom.h" +#include "expr.h" +#include "minunit.h" +#include "numerical_diff.h" +#include "subexpr.h" +#include "test_helpers.h" + +const char *test_jacobian_convolve(void) +{ + /* Test: Jacobian of y = conv([1, 2, 3], x) for x = Variable(3). + * J should equal T(kernel): + * [[1, 0, 0], + * [2, 1, 0], + * [3, 2, 1], + * [0, 3, 2], + * [0, 0, 3]] + * stored in CSR with nnz = 9, shape 5 x 3. */ + double kernel[3] = {1.0, 2.0, 3.0}; + expr *kernel_param = new_parameter(3, 1, PARAM_FIXED, 3, kernel); + expr *x = new_variable(3, 1, 0, 3); + expr *y = new_convolve(kernel_param, x); + + double u[3] = {1.0, 2.0, 3.0}; + y->forward(y, u); + jacobian_init(y); + y->eval_jacobian(y); + + mu_assert("Jacobian should have 5 rows", y->jacobian->m == 5); + mu_assert("Jacobian should have 3 columns", y->jacobian->n == 3); + mu_assert("Jacobian should have 9 nonzeros", y->jacobian->nnz == 9); + + int expected_p[6] = {0, 1, 3, 6, 8, 9}; + int expected_i[9] = {0, 0, 1, 0, 1, 2, 1, 2, 2}; + double expected_x[9] = {1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 3.0, 2.0, 3.0}; + + mu_assert("Convolve Jacobian row pointers incorrect", + cmp_int_array(y->jacobian->p, expected_p, 6)); + mu_assert("Convolve Jacobian column indices incorrect", + cmp_int_array(y->jacobian->i, expected_i, 9)); + mu_assert("Convolve Jacobian values incorrect", + cmp_double_array(y->jacobian->x, expected_x, 9)); + + free_expr(y); + return 0; +} + +const char *test_jacobian_convolve_composite(void) +{ + /* y = conv([1, 2, 3], log(x)) — verify via numerical differentiation that + * the composite path (T(a) @ J_log) matches. */ + double kernel[3] = {1.0, 2.0, 3.0}; + double x_vals[3] = {1.0, 2.0, 3.0}; + + expr *kernel_param = new_parameter(3, 1, PARAM_FIXED, 3, kernel); + expr *x = new_variable(3, 1, 0, 3); + expr *log_x = new_log(x); + expr *y = new_convolve(kernel_param, log_x); + + mu_assert("check_jacobian failed", + check_jacobian_num(y, x_vals, NUMERICAL_DIFF_DEFAULT_H)); + + free_expr(y); + return 0; +} diff --git a/tests/problem/test_param_broadcast.h b/tests/problem/test_param_broadcast.h index 032e8eb..d9b43f1 100644 --- a/tests/problem/test_param_broadcast.h +++ b/tests/problem/test_param_broadcast.h @@ -341,4 +341,69 @@ const char *test_param_hstack_left_matmul(void) return 0; } +const char *test_param_scalar_mult_convolve(void) +{ + int n = 3; + + /* minimize sum(x) subject to conv(s * kernel, x), where kernel is a fixed + length-4 vector and s is an updatable scalar parameter. Scaling s + rescales the entire kernel, so Jacobian and constraint values scale + linearly with s. */ + expr *x = new_variable(3, 1, 0, n); + expr *objective = new_sum(x, -1); + + double kernel_vals[4] = {1.0, 2.0, 3.0, 4.0}; + expr *kernel = new_parameter(4, 1, PARAM_FIXED, n, kernel_vals); + double s_vals[1] = {1.0}; + expr *s = new_parameter(1, 1, 0, n, s_vals); + expr *scaled_kernel = new_scalar_mult(s, kernel); + + expr *constraint = new_convolve(scaled_kernel, x); + expr *constraints[1] = {constraint}; + problem *prob = new_problem(objective, constraints, 1, false); + + expr *param_nodes[1] = {s}; + problem_register_params(prob, param_nodes, 1); + problem_init_derivatives(prob); + + /* point for evaluating and sparsity arrays (T is 6x3 with 12 nonzeros) */ + double x_vals[3] = {1.0, 2.0, 3.0}; + int Ap[7] = {0, 1, 3, 6, 9, 11, 12}; + int Ai[12] = {0, 0, 1, 0, 1, 2, 0, 1, 2, 1, 2, 2}; + + /* test 1: s = 1, effective kernel = [1, 2, 3, 4] */ + problem_constraint_forward(prob, x_vals); + problem_jacobian(prob); + double constrs[6] = {1.0, 4.0, 10.0, 16.0, 17.0, 12.0}; + double Ax[12] = {1.0, 2.0, 1.0, 3.0, 2.0, 1.0, + 4.0, 3.0, 2.0, 4.0, 3.0, 4.0}; + mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 6)); + mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 7)); + mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 12)); + mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 12)); + + mu_assert("check_jacobian failed", + check_jacobian_num(constraint, x_vals, NUMERICAL_DIFF_DEFAULT_H)); + + /* test 2: s = 10 via problem_update_params, effective kernel = [10,20,30,40] */ + double theta[1] = {10.0}; + problem_update_params(prob, theta); + problem_constraint_forward(prob, x_vals); + problem_jacobian(prob); + double updated_constrs[6] = {10.0, 40.0, 100.0, 160.0, 170.0, 120.0}; + double updated_Ax[12] = {10.0, 20.0, 10.0, 30.0, 20.0, 10.0, + 40.0, 30.0, 20.0, 40.0, 30.0, 40.0}; + mu_assert("vals fail", + cmp_double_array(prob->constraint_values, updated_constrs, 6)); + mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 7)); + mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 12)); + mu_assert("vals fail", cmp_double_array(prob->jacobian->x, updated_Ax, 12)); + + mu_assert("check_jacobian failed", + check_jacobian_num(constraint, x_vals, NUMERICAL_DIFF_DEFAULT_H)); + + free_problem(prob); + return 0; +} + #endif /* TEST_PARAM_BROADCAST_H */ diff --git a/tests/problem/test_param_prob.h b/tests/problem/test_param_prob.h index 40350d6..e3022a5 100644 --- a/tests/problem/test_param_prob.h +++ b/tests/problem/test_param_prob.h @@ -6,6 +6,7 @@ #include #include "atoms/affine.h" +#include "atoms/elementwise_full_dom.h" #include "atoms/elementwise_restricted_dom.h" #include "expr.h" #include "minunit.h" @@ -535,4 +536,95 @@ const char *test_param_shared_left_matmul_problem(void) return 0; } +const char *test_param_convolve_problem(void) +{ + int n = 3; + + /* minimize sum(x) subject to conv(a, exp(x)), with kernel a as parameter. + The exp nonlinearity gives a non-zero Hessian that depends on the kernel, + so both Jacobian and Hessian refresh paths get exercised on + problem_update_params. At x = [0, 0, 0] we have exp(x) = 1, so J = T + and H diagonal = T^T @ 1_5 = sum(a) per column. */ + expr *x = new_variable(3, 1, 0, n); + expr *exp_x = new_exp(x); + expr *objective = new_sum(x, -1); + double theta[3] = {1.0, 2.0, 3.0}; + expr *a_param = new_parameter(3, 1, 0, n, theta); + expr *constraint = new_convolve(a_param, exp_x); + expr *constraints[1] = {constraint}; + problem *prob = new_problem(objective, constraints, 1, true); + + /* register parameters and fill sparsity patterns */ + expr *param_nodes[1] = {a_param}; + problem_register_params(prob, param_nodes, 1); + problem_init_derivatives(prob); + + /* point for evaluating and constant sparsity arrays */ + double x_vals[3] = {0.0, 0.0, 0.0}; + double w[5] = {1.0, 1.0, 1.0, 1.0, 1.0}; + int Ap[6] = {0, 1, 3, 6, 8, 9}; + int Ai[9] = {0, 0, 1, 0, 1, 2, 1, 2, 2}; + int Hp[4] = {0, 1, 2, 3}; + int Hi[3] = {0, 1, 2}; + + /* test 1: kernel a = [1, 2, 3] */ + problem_constraint_forward(prob, x_vals); + problem_jacobian(prob); + problem_hessian(prob, 0.0, w); + double constrs[5] = {1.0, 3.0, 6.0, 5.0, 3.0}; + double Ax[9] = {1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 3.0, 2.0, 3.0}; + double Hx[3] = {6.0, 6.0, 6.0}; + mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 5)); + mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 6)); + mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 9)); + mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 9)); + mu_assert("hess rows fail", + cmp_int_array(prob->lagrange_hessian->p, Hp, 4)); + mu_assert("hess cols fail", + cmp_int_array(prob->lagrange_hessian->i, Hi, 3)); + mu_assert("hess vals fail", + cmp_double_array(prob->lagrange_hessian->x, Hx, 3)); + + /* test 2: kernel a = [4, 5, 6] — refresh path must rebuild T(a) values + and propagate to both Jacobian and Hessian. */ + theta[0] = 4.0; + theta[1] = 5.0; + theta[2] = 6.0; + problem_update_params(prob, theta); + problem_constraint_forward(prob, x_vals); + problem_jacobian(prob); + problem_hessian(prob, 0.0, w); + constrs[0] = 4.0; + constrs[1] = 9.0; + constrs[2] = 15.0; + constrs[3] = 11.0; + constrs[4] = 6.0; + Ax[0] = 4.0; + Ax[1] = 5.0; + Ax[2] = 4.0; + Ax[3] = 6.0; + Ax[4] = 5.0; + Ax[5] = 4.0; + Ax[6] = 6.0; + Ax[7] = 5.0; + Ax[8] = 6.0; + Hx[0] = 15.0; + Hx[1] = 15.0; + Hx[2] = 15.0; + mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 5)); + mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 6)); + mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 9)); + mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 9)); + mu_assert("hess rows fail", + cmp_int_array(prob->lagrange_hessian->p, Hp, 4)); + mu_assert("hess cols fail", + cmp_int_array(prob->lagrange_hessian->i, Hi, 3)); + mu_assert("hess vals fail", + cmp_double_array(prob->lagrange_hessian->x, Hx, 3)); + + free_problem(prob); + + return 0; +} + #endif /* TEST_PARAM_PROB_H */ diff --git a/tests/wsum_hess/affine/test_convolve.h b/tests/wsum_hess/affine/test_convolve.h new file mode 100644 index 0000000..98dd7ab --- /dev/null +++ b/tests/wsum_hess/affine/test_convolve.h @@ -0,0 +1,57 @@ +#include + +#include "atoms/affine.h" +#include "atoms/elementwise_restricted_dom.h" +#include "expr.h" +#include "minunit.h" +#include "numerical_diff.h" +#include "subexpr.h" +#include "test_helpers.h" + +const char *test_wsum_hess_convolve(void) +{ + /* y = conv([1, 2, 3], x) is linear in x when the kernel is constant. + * Child x is a leaf variable (zero Hessian), so y's weighted Hessian + * should be zero for any weight vector w. */ + double kernel[3] = {1.0, 2.0, 3.0}; + expr *kernel_param = new_parameter(3, 1, PARAM_FIXED, 3, kernel); + expr *x = new_variable(3, 1, 0, 3); + expr *y = new_convolve(kernel_param, x); + + double u[3] = {1.0, 2.0, 3.0}; + double w[5] = {1.0, -1.0, 2.0, 3.0, -2.0}; + + y->forward(y, u); + jacobian_init(y); + wsum_hess_init(y); + y->eval_wsum_hess(y, w); + + mu_assert("Convolve wsum_hess should be 3x3", y->wsum_hess->m == 3); + mu_assert("Convolve wsum_hess should be square", y->wsum_hess->n == 3); + mu_assert("Convolve wsum_hess should have zero nonzeros", + y->wsum_hess->nnz == 0); + + free_expr(y); + return 0; +} + +const char *test_wsum_hess_convolve_composite(void) +{ + /* y = conv([1, 2, 3], log(x)) — nonlinear in x, so backprop of weights + * through the Toeplitz should produce the same Hessian as a numerical + * second-derivative check. */ + double kernel[3] = {1.0, 2.0, 3.0}; + double x_vals[3] = {1.0, 2.0, 3.0}; + double w[5] = {1.0, -1.0, 2.0, 3.0, -2.0}; + + expr *kernel_param = new_parameter(3, 1, PARAM_FIXED, 3, kernel); + expr *x = new_variable(3, 1, 0, 3); + expr *log_x = new_log(x); + expr *y = new_convolve(kernel_param, log_x); + + mu_assert("check_wsum_hess failed", + check_wsum_hess(y, x_vals, w, NUMERICAL_DIFF_DEFAULT_H)); + + free_expr(y); + return 0; +} From d2cd16a704ca3996c2e32a0e79ad1138bbd1586f Mon Sep 17 00:00:00 2001 From: William Zijie Zhang Date: Tue, 21 Apr 2026 22:14:28 -0400 Subject: [PATCH 2/7] apply formatter --- include/subexpr.h | 8 ++++---- src/atoms/affine/convolve.c | 3 +-- tests/problem/test_param_broadcast.h | 3 +-- tests/problem/test_param_prob.h | 18 ++++++------------ 4 files changed, 12 insertions(+), 20 deletions(-) diff --git a/include/subexpr.h b/include/subexpr.h index b225c84..46bbd57 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -156,10 +156,10 @@ typedef struct vector_mult_expr typedef struct convolve_expr { expr base; - expr *param_source; /* length-m kernel */ - int m; /* kernel length */ - int n; /* input length */ - Matrix *T; /* (m+n-1) x n Toeplitz Sparse_Matrix */ + expr *param_source; /* length-m kernel */ + int m; /* kernel length */ + int n; /* input length */ + Matrix *T; /* (m+n-1) x n Toeplitz Sparse_Matrix */ CSC_Matrix *Jchild_CSC; CSC_Matrix *J_CSC; int *csc_to_csr_work; diff --git a/src/atoms/affine/convolve.c b/src/atoms/affine/convolve.c index e6466d8..133fd67 100644 --- a/src/atoms/affine/convolve.c +++ b/src/atoms/affine/convolve.c @@ -248,8 +248,7 @@ expr *new_convolve(expr *param_node, expr *child) exit(1); } - convolve_expr *cnode = - (convolve_expr *) SP_CALLOC(1, sizeof(convolve_expr)); + convolve_expr *cnode = (convolve_expr *) SP_CALLOC(1, sizeof(convolve_expr)); expr *node = &cnode->base; init_expr(node, out_d1, out_d2, child->n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, diff --git a/tests/problem/test_param_broadcast.h b/tests/problem/test_param_broadcast.h index d9b43f1..53e4456 100644 --- a/tests/problem/test_param_broadcast.h +++ b/tests/problem/test_param_broadcast.h @@ -375,8 +375,7 @@ const char *test_param_scalar_mult_convolve(void) problem_constraint_forward(prob, x_vals); problem_jacobian(prob); double constrs[6] = {1.0, 4.0, 10.0, 16.0, 17.0, 12.0}; - double Ax[12] = {1.0, 2.0, 1.0, 3.0, 2.0, 1.0, - 4.0, 3.0, 2.0, 4.0, 3.0, 4.0}; + double Ax[12] = {1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 4.0, 3.0, 2.0, 4.0, 3.0, 4.0}; mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 6)); mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 7)); mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 12)); diff --git a/tests/problem/test_param_prob.h b/tests/problem/test_param_prob.h index e3022a5..02c1a67 100644 --- a/tests/problem/test_param_prob.h +++ b/tests/problem/test_param_prob.h @@ -578,12 +578,9 @@ const char *test_param_convolve_problem(void) mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 6)); mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 9)); mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 9)); - mu_assert("hess rows fail", - cmp_int_array(prob->lagrange_hessian->p, Hp, 4)); - mu_assert("hess cols fail", - cmp_int_array(prob->lagrange_hessian->i, Hi, 3)); - mu_assert("hess vals fail", - cmp_double_array(prob->lagrange_hessian->x, Hx, 3)); + mu_assert("hess rows fail", cmp_int_array(prob->lagrange_hessian->p, Hp, 4)); + mu_assert("hess cols fail", cmp_int_array(prob->lagrange_hessian->i, Hi, 3)); + mu_assert("hess vals fail", cmp_double_array(prob->lagrange_hessian->x, Hx, 3)); /* test 2: kernel a = [4, 5, 6] — refresh path must rebuild T(a) values and propagate to both Jacobian and Hessian. */ @@ -615,12 +612,9 @@ const char *test_param_convolve_problem(void) mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 6)); mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 9)); mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 9)); - mu_assert("hess rows fail", - cmp_int_array(prob->lagrange_hessian->p, Hp, 4)); - mu_assert("hess cols fail", - cmp_int_array(prob->lagrange_hessian->i, Hi, 3)); - mu_assert("hess vals fail", - cmp_double_array(prob->lagrange_hessian->x, Hx, 3)); + mu_assert("hess rows fail", cmp_int_array(prob->lagrange_hessian->p, Hp, 4)); + mu_assert("hess cols fail", cmp_int_array(prob->lagrange_hessian->i, Hi, 3)); + mu_assert("hess vals fail", cmp_double_array(prob->lagrange_hessian->x, Hx, 3)); free_problem(prob); From b8dbf3d3fc0c31fbdec6b1ae89f88ba436fe506f Mon Sep 17 00:00:00 2001 From: William Zijie Zhang Date: Tue, 21 Apr 2026 23:03:55 -0400 Subject: [PATCH 3/7] adds test for row convolve in case it is 1d in python --- src/atoms/affine/convolve.c | 36 ++++++++--------------- tests/all_tests.c | 1 + tests/forward_pass/affine/test_convolve.h | 24 +++++++++++++++ 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/src/atoms/affine/convolve.c b/src/atoms/affine/convolve.c index 133fd67..51b80dc 100644 --- a/src/atoms/affine/convolve.c +++ b/src/atoms/affine/convolve.c @@ -59,8 +59,9 @@ static void build_toeplitz(CSR_Matrix *T_csr, const double *a, int m, int n) T_csr->p[out_rows] = nnz; } -/* Overwrite T(a)'s CSR values (only) from the current kernel. Sparsity - pattern is unchanged, so we just walk the existing CSR entries. */ +/* Values-only refresh analogous to refresh_dense_left in left_matmul.c: + sparsity is kernel-independent so p/i stay intact; we just walk the + existing CSR entries and overwrite x from the current kernel. */ static void refresh_toeplitz_values(convolve_expr *cnode) { CSR_Matrix *T_csr = ((Sparse_Matrix *) cnode->T)->csr; @@ -172,13 +173,14 @@ static void eval_wsum_hess(expr *node, const double *w) /* w' = T(a)^T w. T(a)^T[j, k] = a[k-j] for j <= k < j+m, so w'[j] = sum_{i=0..m-1} a[i] * w[i + j]. */ - memset(w_prime, 0, n * sizeof(double)); for (int j = 0; j < n; j++) { + double sum = 0.0; for (int i = 0; i < m; i++) { - w_prime[j] += a[i] * w[i + j]; + sum += a[i] * w[i + j]; } + w_prime[j] = sum; } child->eval_wsum_hess(child, w_prime); @@ -198,39 +200,25 @@ static void free_type_data(expr *node) free_csc_matrix(cnode->Jchild_CSC); free_csc_matrix(cnode->J_CSC); free(cnode->csc_to_csr_work); - if (cnode->param_source != NULL) - { - free_expr(cnode->param_source); - } - cnode->T = NULL; - cnode->Jchild_CSC = NULL; - cnode->J_CSC = NULL; - cnode->csc_to_csr_work = NULL; - cnode->param_source = NULL; + free_expr(cnode->param_source); } expr *new_convolve(expr *param_node, expr *child) { - if (param_node == NULL) - { - fprintf(stderr, "Error in new_convolve: param_node must not be NULL\n"); - exit(1); - } - + /* Accept both (n, 1) column and (1, n) row shapes — numpy-style 1D + arrays reach us as (1, n) through the Python bindings, and the math + is shape-agnostic (flat buffers of size m + n - 1). Output shape + matches the input orientation. */ int m = param_node->size; - int n; - int out_d1, out_d2; - + int n, out_d1, out_d2; if (child->d2 == 1) { - /* child is (n, 1) column vector */ n = child->d1; out_d1 = m + n - 1; out_d2 = 1; } else if (child->d1 == 1) { - /* child is (1, n) row vector */ n = child->d2; out_d1 = 1; out_d2 = m + n - 1; diff --git a/tests/all_tests.c b/tests/all_tests.c index 4af1dd4..76807b6 100644 --- a/tests/all_tests.c +++ b/tests/all_tests.c @@ -138,6 +138,7 @@ int main(void) mu_run_test(test_matmul, tests_run); mu_run_test(test_left_matmul_dense, tests_run); mu_run_test(test_convolve_forward, tests_run); + mu_run_test(test_convolve_forward_row, tests_run); mu_run_test(test_convolve_forward_param, tests_run); mu_run_test(test_diag_mat_forward, tests_run); mu_run_test(test_upper_tri_forward_4x4, tests_run); diff --git a/tests/forward_pass/affine/test_convolve.h b/tests/forward_pass/affine/test_convolve.h index 6a2cfd2..14d5988 100644 --- a/tests/forward_pass/affine/test_convolve.h +++ b/tests/forward_pass/affine/test_convolve.h @@ -31,6 +31,30 @@ const char *test_convolve_forward(void) return 0; } +const char *test_convolve_forward_row(void) +{ + /* Same as test_convolve_forward but x is a (1, 3) row vector. + * Output orientation should match: (1, 5). */ + double kernel[3] = {1.0, 2.0, 3.0}; + expr *kernel_param = new_parameter(3, 1, PARAM_FIXED, 3, kernel); + expr *x = new_variable(1, 3, 0, 3); + expr *y = new_convolve(kernel_param, x); + + double u[3] = {1.0, 2.0, 3.0}; + y->forward(y, u); + + double expected[5] = {1.0, 4.0, 10.0, 12.0, 9.0}; + + mu_assert("convolve row result should have d1=1", y->d1 == 1); + mu_assert("convolve row result should have d2=5", y->d2 == 5); + mu_assert("convolve row result should have size=5", y->size == 5); + mu_assert("Convolve forward pass (row vector) failed", + cmp_double_array(y->value, expected, 5)); + + free_expr(y); + return 0; +} + const char *test_convolve_forward_param(void) { /* Same as test_convolve_forward, but the kernel is an updatable parameter From c3d3dfeaebb2ce20eea7a573d8482317912fac74 Mon Sep 17 00:00:00 2001 From: dance858 Date: Thu, 23 Apr 2026 09:20:35 -0700 Subject: [PATCH 4/7] improve convention of data pointer when we have parameters for left and right matmul --- include/atoms/affine.h | 32 ++++++++++++++--------- include/utils/dense_matrix.h | 3 ++- src/atoms/affine/left_matmul.c | 26 ++++++++++++++++--- src/atoms/affine/right_matmul.c | 30 +++++++++++++-------- src/utils/dense_matrix.c | 5 +++- tests/problem/test_param_broadcast.h | 8 ++---- tests/problem/test_param_prob.h | 39 +++++++++++++--------------- 7 files changed, 88 insertions(+), 55 deletions(-) diff --git a/include/atoms/affine.h b/include/atoms/affine.h index a66add6..9527d3c 100644 --- a/include/atoms/affine.h +++ b/include/atoms/affine.h @@ -42,19 +42,31 @@ expr *new_diag_mat(expr *child); expr *new_upper_tri(expr *child); expr *new_transpose(expr *child); -/* Left matrix multiplication: A @ f(x) where A is a constant or parameter - * sparse matrix. param_node is NULL for fixed constants. */ +/* Left matrix multiplication: A @ f(x) where A is a constant sparse matrix. + param_node is NULL for fixed constants. We currently do not support sparse + parameters, so param_node should always be null. */ expr *new_left_matmul(expr *param_node, expr *u, const CSR_Matrix *A); -/* Left matrix multiplication: A @ f(x) where A is a constant or parameter - * dense matrix (row-major, m x n). Uses CBLAS for efficient computation. */ +/* Left matrix multiplication: A @ f(x) where A is a constant dense matrix + (in row-major, m x n, with values given by 'data') or a parameter + representing a dense m x n matrix in row-major. + + The 'data' pointer only represents the values of the dense matrix if + param_node is null. If param_node is not null, data must be null. */ expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n, const double *data); -/* Right matrix multiplication: f(x) @ A where A is a constant or parameter - * matrix. */ +/* Right matrix multiplication: f(x) @ A where A is a constant sparse matrix. + We currently do not support sparse parameters, so param_node should always be + null. */ expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A); +/* Right matrix multiplication: f(x) @ A where A is a constant dense matrix + (in row-major, m x n, with values given by 'data') or a parameter + representing a dense m x n matrix in row-major. + + The 'data' pointer only represents the values of the dense matrix if + param_node is null. If param_node is not null, data must be null. */ expr *new_right_matmul_dense(expr *param_node, expr *u, int m, int n, const double *data); @@ -65,12 +77,8 @@ expr *new_scalar_mult(expr *param_node, expr *child); * param_node */ expr *new_vector_mult(expr *param_node, expr *child); -/* 1D full convolution: y = conv(a, child) where a is the kernel held by - * param_node (length m) and child is a length-n vector (shape (n, 1) or - * (1, n)). Output is length (m + n - 1). param_node may be PARAM_FIXED - * or an updatable parameter; the kernel values refresh automatically on - * problem_update_params (same convention as new_vector_mult / - * new_scalar_mult). */ +/* 1D full convolution: y = conv(param_node, child) where param_node is the + kernel and may either represent a constant or an updatable parameter */ expr *new_convolve(expr *param_node, expr *child); #endif /* AFFINE_H */ diff --git a/include/utils/dense_matrix.h b/include/utils/dense_matrix.h index ab3aea0..bd1693b 100644 --- a/include/utils/dense_matrix.h +++ b/include/utils/dense_matrix.h @@ -28,7 +28,8 @@ typedef struct Dense_Matrix double *work; /* scratch buffer, length n */ } Dense_Matrix; -/* Constructors */ +/* Constructors. If data is NULL, the value buffer is allocated but left + uninitialized; otherwise m*n entries are copied from data. */ Matrix *new_dense_matrix(int m, int n, const double *data); /* Transpose helper */ diff --git a/src/atoms/affine/left_matmul.c b/src/atoms/affine/left_matmul.c index 2928d23..d5d87a1 100644 --- a/src/atoms/affine/left_matmul.c +++ b/src/atoms/affine/left_matmul.c @@ -276,15 +276,35 @@ expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n, lnode->csc_to_csr_work = (int *) SP_MALLOC(node->size * sizeof(int)); lnode->n_blocks = n_blocks; - lnode->A = new_dense_matrix(m, n, data); - lnode->AT = dense_matrix_trans((const Dense_Matrix *) lnode->A); - /* parameter support */ lnode->param_source = param_node; if (param_node != NULL) { + if (data != NULL) + { + fprintf(stderr, "Error in new_left_matmul_dense with ptr convention \n"); + exit(1); + } + expr_retain(param_node); lnode->refresh_param_values = refresh_dense_left; + + /* A and AT buffers are filled by refresh_dense_left from the parameter. */ + lnode->A = new_dense_matrix(m, n, NULL); + lnode->AT = new_dense_matrix(n, m, NULL); + node->needs_parameter_refresh = true; + } + /* constant matrix and param_source should be null */ + else + { + if (data == NULL) + { + fprintf(stderr, "Error in new_left_matmul_dense with ptr convention \n"); + exit(1); + } + + lnode->A = new_dense_matrix(m, n, data); + lnode->AT = dense_matrix_trans((const Dense_Matrix *) lnode->A); } return node; diff --git a/src/atoms/affine/right_matmul.c b/src/atoms/affine/right_matmul.c index 56075dc..c3344f2 100644 --- a/src/atoms/affine/right_matmul.c +++ b/src/atoms/affine/right_matmul.c @@ -82,24 +82,32 @@ expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A) expr *new_right_matmul_dense(expr *param_node, expr *u, int m, int n, const double *data) { - /* We express: u @ A = (A^T @ u^T)^T. A is m x n, so A^T is n x m. */ - double *AT = (double *) SP_MALLOC(n * m * sizeof(double)); - A_transpose(AT, data, m, n); + if ((param_node != NULL && data != NULL) || (param_node == NULL && data == NULL)) + { + fprintf(stderr, "Error in new_right_matmul_dense with ptr convention \n"); + exit(1); + } + /* We express: u @ A = (A^T @ u^T)^T. A is m x n, so A^T is n x m. */ expr *u_transpose = new_transpose(u); - expr *left_matmul_node = new_left_matmul_dense(NULL, u_transpose, n, m, AT); + expr *left_matmul_node; - /* If parameterized, attach param_source and custom refresh */ if (param_node != NULL) { + left_matmul_node = + new_left_matmul_dense(param_node, u_transpose, n, m, NULL); left_matmul_expr *lnode = (left_matmul_expr *) left_matmul_node; - lnode->param_source = param_node; - expr_retain(param_node); + + /* swap the refresh function */ lnode->refresh_param_values = refresh_dense_right; } + else + { + double *AT = (double *) SP_MALLOC(n * m * sizeof(double)); + A_transpose(AT, data, m, n); + left_matmul_node = new_left_matmul_dense(NULL, u_transpose, n, m, AT); + free(AT); + } - expr *node = new_transpose(left_matmul_node); - - free(AT); - return node; + return new_transpose(left_matmul_node); } diff --git a/src/utils/dense_matrix.c b/src/utils/dense_matrix.c index a521f15..5c628c2 100644 --- a/src/utils/dense_matrix.c +++ b/src/utils/dense_matrix.c @@ -67,7 +67,10 @@ Matrix *new_dense_matrix(int m, int n, const double *data) dm->base.update_values = dense_update_values; dm->base.free_fn = dense_free; dm->x = (double *) SP_MALLOC(m * n * sizeof(double)); - memcpy(dm->x, data, m * n * sizeof(double)); + if (data != NULL) + { + memcpy(dm->x, data, m * n * sizeof(double)); + } dm->work = (double *) SP_MALLOC(n * sizeof(double)); return &dm->base; } diff --git a/tests/problem/test_param_broadcast.h b/tests/problem/test_param_broadcast.h index 53e4456..377121a 100644 --- a/tests/problem/test_param_broadcast.h +++ b/tests/problem/test_param_broadcast.h @@ -269,9 +269,7 @@ const char *test_const_hstack_left_matmul(void) expr *p2 = new_parameter(1, n, PARAM_FIXED, 2 * n, p2_vals); expr *param_nodes[2] = {p1, p2}; expr *p_hstack = new_hstack(param_nodes, 2, 2 * n); - /* pass concatenated parameter vectors */ - double A_data[8] = {1.0, 2.0, 3.0, 0.0, 4.0, 0.0, 5.0, 6.0}; - expr *objective = new_left_matmul_dense(p_hstack, x, 1, 2 * n, A_data); + expr *objective = new_left_matmul_dense(p_hstack, x, 1, 2 * n, NULL); problem *prob = new_problem(objective, NULL, 0, false); problem_init_derivatives(prob); @@ -304,9 +302,7 @@ const char *test_param_hstack_left_matmul(void) expr *p2 = new_parameter(1, n, n, 2 * n, p2_vals); expr *param_nodes[2] = {p1, p2}; expr *p_hstack = new_hstack(param_nodes, 2, 2 * n); - /* pass concatenated parameter vectors */ - double A_data[8] = {1.0, 2.0, 3.0, 0.0, 4.0, 0.0, 5.0, 6.0}; - expr *objective = new_left_matmul_dense(p_hstack, x, 1, 2 * n, A_data); + expr *objective = new_left_matmul_dense(p_hstack, x, 1, 2 * n, NULL); problem *prob = new_problem(objective, NULL, 0, false); problem_register_params(prob, param_nodes, 2); diff --git a/tests/problem/test_param_prob.h b/tests/problem/test_param_prob.h index 02c1a67..a638b31 100644 --- a/tests/problem/test_param_prob.h +++ b/tests/problem/test_param_prob.h @@ -167,12 +167,11 @@ const char *test_param_left_matmul_problem(void) /* minimize sum(x) subject to Ax = ?, with A parameter */ expr *x = new_variable(2, 1, 0, n); expr *objective = new_sum(x, -1); - double theta[4] = {1.0, 0.0, 0.0, 1.0}; + /* column-major of A = [[2,4],[0,1]] */ + double theta[4] = {2.0, 0.0, 4.0, 1.0}; expr *A_param = new_parameter(2, 2, 0, n, theta); - /* dense 2x2 matrix */ - double Ax[4] = {2.0, 0.0, 0.0, 1.0}; - expr *constraint = new_left_matmul_dense(A_param, x, 2, 2, Ax); + expr *constraint = new_left_matmul_dense(A_param, x, 2, 2, NULL); expr *constraints[1] = {constraint}; problem *prob = new_problem(objective, constraints, 1, false); @@ -188,7 +187,8 @@ const char *test_param_left_matmul_problem(void) /* test 1: initial jacobian */ problem_constraint_forward(prob, x_vals); - double constrs[2] = {2.0, 2.0}; + double constrs[2] = {10.0, 2.0}; + double Ax[4] = {2.0, 4.0, 0.0, 1.0}; problem_jacobian(prob); mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 2)); mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 4)); @@ -227,12 +227,11 @@ const char *test_param_right_matmul_problem(void) /* minimize sum(x) subject to xA = ?, with A parameter */ expr *x = new_variable(1, 2, 0, n); expr *objective = new_sum(x, -1); - double theta[4] = {1.0, 0.0, 0.0, 1.0}; + /* column-major of A = [[2,0],[0,1]] */ + double theta[4] = {2.0, 0.0, 0.0, 1.0}; expr *A_param = new_parameter(2, 2, 0, n, theta); - /* dense 2x2 matrix */ - double Ax[4] = {2.0, 0.0, 0.0, 1.0}; - expr *constraint = new_right_matmul_dense(A_param, x, 2, 2, Ax); + expr *constraint = new_right_matmul_dense(A_param, x, 2, 2, NULL); expr *constraints[1] = {constraint}; problem *prob = new_problem(objective, constraints, 1, false); @@ -249,6 +248,7 @@ const char *test_param_right_matmul_problem(void) /* test 1: initial jacobian */ problem_constraint_forward(prob, x_vals); double constrs[2] = {2.0, 2.0}; + double Ax[4] = {2.0, 0.0, 0.0, 1.0}; problem_jacobian(prob); mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 2)); mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 4)); @@ -343,12 +343,11 @@ const char *test_param_left_matmul_rectangular(void) /* minimize sum(x) subject to Ax = ?, with A parameter (3x2) */ expr *x = new_variable(2, 1, 0, n); expr *objective = new_sum(x, -1); - double theta[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + /* column-major of A = [[1,2],[3,4],[5,6]] */ + double theta[6] = {1.0, 3.0, 5.0, 2.0, 4.0, 6.0}; expr *A_param = new_parameter(3, 2, 0, n, theta); - /* dense 3x2 matrix */ - double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; - expr *constraint = new_left_matmul_dense(A_param, x, 3, 2, Ax); + expr *constraint = new_left_matmul_dense(A_param, x, 3, 2, NULL); expr *constraints[1] = {constraint}; problem *prob = new_problem(objective, constraints, 1, false); @@ -365,6 +364,7 @@ const char *test_param_left_matmul_rectangular(void) /* test 1: initial jacobian */ problem_constraint_forward(prob, x_vals); double constrs[3] = {5.0, 11.0, 17.0}; + double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; problem_jacobian(prob); mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 3)); mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 6)); @@ -408,12 +408,11 @@ const char *test_param_right_matmul_rectangular(void) /* minimize sum(x) subject to xA = ?, with A parameter (2x3) */ expr *x = new_variable(1, 2, 0, n); expr *objective = new_sum(x, -1); - double theta[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + /* column-major of A = [[1,2,3],[4,5,6]] */ + double theta[6] = {1.0, 4.0, 2.0, 5.0, 3.0, 6.0}; expr *A_param = new_parameter(2, 3, 0, n, theta); - /* dense 2x3 matrix */ - double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; - expr *constraint = new_right_matmul_dense(A_param, x, 2, 3, Ax); + expr *constraint = new_right_matmul_dense(A_param, x, 2, 3, NULL); expr *constraints[1] = {constraint}; problem *prob = new_problem(objective, constraints, 1, false); @@ -478,11 +477,9 @@ const char *test_param_shared_left_matmul_problem(void) double theta[4] = {1.0, 0.0, 0.0, 1.0}; expr *A_param = new_parameter(2, 2, 0, n, theta); - /* dense 2x2 identity */ - double Ax[4] = {1.0, 0.0, 0.0, 1.0}; expr *constraints[2]; - constraints[0] = new_left_matmul_dense(A_param, x, 2, 2, Ax); - constraints[1] = new_left_matmul_dense(A_param, y, 2, 2, Ax); + constraints[0] = new_left_matmul_dense(A_param, x, 2, 2, NULL); + constraints[1] = new_left_matmul_dense(A_param, y, 2, 2, NULL); problem *prob = new_problem(objective, constraints, 2, false); /* register parameters and fill sparsity patterns */ From 82adda44534e3fa23b4c87fe2be526e8f117e116 Mon Sep 17 00:00:00 2001 From: dance858 Date: Thu, 23 Apr 2026 09:48:47 -0700 Subject: [PATCH 5/7] get rid of sparse matrix abstraction and just do CSR instead --- include/subexpr.h | 2 +- include/utils/mini_numpy.h | 11 +++++ src/atoms/affine/convolve.c | 76 +++++++--------------------------- src/atoms/affine/left_matmul.c | 2 +- src/utils/mini_numpy.c | 28 +++++++++++++ 5 files changed, 57 insertions(+), 62 deletions(-) diff --git a/include/subexpr.h b/include/subexpr.h index 46bbd57..b129f2e 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -159,7 +159,7 @@ typedef struct convolve_expr expr *param_source; /* length-m kernel */ int m; /* kernel length */ int n; /* input length */ - Matrix *T; /* (m+n-1) x n Toeplitz Sparse_Matrix */ + CSR_Matrix *T; /* (m+n-1) x n convolution matrix */ CSC_Matrix *Jchild_CSC; CSC_Matrix *J_CSC; int *csc_to_csr_work; diff --git a/include/utils/mini_numpy.h b/include/utils/mini_numpy.h index 1011de3..c87f80a 100644 --- a/include/utils/mini_numpy.h +++ b/include/utils/mini_numpy.h @@ -18,6 +18,8 @@ #ifndef MINI_NUMPY_H #define MINI_NUMPY_H +#include "utils/CSR_Matrix.h" + /* Example: a = [1, 2], len = 2, repeats = 3, result = [1, 1, 1, 2, 2, 2] */ void repeat(double *result, const double *a, int len, int repeats); @@ -41,4 +43,13 @@ void Y_kron_I_vec(int m, int k, int n, const double *Y, const double *w, double compute v = vec(X^T @ W). */ void I_kron_XT_vec(int m, int k, int n, const double *X, const double *w, double *v); +/* Fill T_csr's row pointers and column indices for the 1D full-convolution + Toeplitz matrix T(a), sized (m+n-1) x n with m*n nonzeros. Values (x) are + not written; call conv_matrix_fill_values to populate them. */ +void conv_matrix_fill_sparsity(CSR_Matrix *T_csr, int m, int n); + +/* Overwrite T_csr->x from kernel a, using the sparsity already written by + conv_matrix_fill_sparsity. T[r, col] = a[r - col]. */ +void conv_matrix_fill_values(CSR_Matrix *T_csr, const double *a); + #endif /* MINI_NUMPY_H */ diff --git a/src/atoms/affine/convolve.c b/src/atoms/affine/convolve.c index 51b80dc..ab0fbaf 100644 --- a/src/atoms/affine/convolve.c +++ b/src/atoms/affine/convolve.c @@ -18,7 +18,8 @@ #include "atoms/affine.h" #include "subexpr.h" #include "utils/CSR_Matrix.h" -#include "utils/matrix.h" +#include "utils/linalg_sparse_matmuls.h" +#include "utils/mini_numpy.h" #include "utils/tracked_alloc.h" #include #include @@ -37,45 +38,6 @@ * from the new kernel before running the convolution. The sparsity of * T(a) is kernel-independent (staircase), so only values are rewritten. */ -/* Fill T(a)'s CSR row pointers, column indices, and values from kernel a. - Row r holds T[r, col] = a[r - col] for col in - [max(0, r-m+1), min(n-1, r)]. */ -static void build_toeplitz(CSR_Matrix *T_csr, const double *a, int m, int n) -{ - int out_rows = m + n - 1; - int nnz = 0; - for (int r = 0; r < out_rows; r++) - { - T_csr->p[r] = nnz; - int col_lo = r - m + 1 > 0 ? r - m + 1 : 0; - int col_hi = r < n - 1 ? r : n - 1; - for (int col = col_lo; col <= col_hi; col++) - { - T_csr->i[nnz] = col; - T_csr->x[nnz] = a[r - col]; - nnz++; - } - } - T_csr->p[out_rows] = nnz; -} - -/* Values-only refresh analogous to refresh_dense_left in left_matmul.c: - sparsity is kernel-independent so p/i stay intact; we just walk the - existing CSR entries and overwrite x from the current kernel. */ -static void refresh_toeplitz_values(convolve_expr *cnode) -{ - CSR_Matrix *T_csr = ((Sparse_Matrix *) cnode->T)->csr; - const double *a = cnode->param_source->value; - int out_rows = cnode->m + cnode->n - 1; - for (int r = 0; r < out_rows; r++) - { - for (int k = T_csr->p[r]; k < T_csr->p[r + 1]; k++) - { - T_csr->x[k] = a[r - T_csr->i[k]]; - } - } -} - static void forward(expr *node, const double *u) { expr *child = node->left; @@ -84,12 +46,12 @@ static void forward(expr *node, const double *u) if (cnode->base.needs_parameter_refresh) { cnode->param_source->forward(cnode->param_source, NULL); - /* T is NULL until jacobian_init_impl runs; forward can be called - first in standalone tests, in which case T is built later from - the already-refreshed param values. */ + /* refresh the convolution matrix values if it exists (necessary to check + for null in case someone calls forward before initializing the jacobian, + which might happen when we expose Python bindings to SparseDiffEngine) */ if (cnode->T != NULL) { - refresh_toeplitz_values(cnode); + conv_matrix_fill_values(cnode->T, cnode->param_source->value); } cnode->base.needs_parameter_refresh = false; } @@ -99,13 +61,11 @@ static void forward(expr *node, const double *u) const double *a = cnode->param_source->value; const double *x = child->value; double *y = node->value; - int m = cnode->m; - int n = cnode->n; memset(y, 0, node->size * sizeof(double)); - for (int j = 0; j < n; j++) + for (int j = 0; j < cnode->n; j++) { - for (int i = 0; i < m; i++) + for (int i = 0; i < cnode->m; i++) { y[i + j] += a[i] * x[j]; } @@ -122,19 +82,15 @@ static void jacobian_init_impl(expr *node) jacobian_init(child); - /* Build T(a) as CSR: (m+n-1) x n, m*n nonzeros, staircase pattern. */ - int out_rows = m + n - 1; - CSR_Matrix *T_csr = new_csr_matrix(out_rows, n, m * n); - build_toeplitz(T_csr, a, m, n); - - cnode->T = new_sparse_matrix(T_csr); - free_csr_matrix(T_csr); + /* Build convolution matrix of size (m+n-1) x n with m*n nonzeros */ + cnode->T = new_csr_matrix(m + n - 1, n, m * n); + conv_matrix_fill_sparsity(cnode->T, m, n); + conv_matrix_fill_values(cnode->T, a); - /* Reuse left_matmul's sparsity precompute: J_node = T @ J_child via - block_left_mult with n_blocks = 1. */ + /* J_node = T @ J_child via block_left_mult with n_blocks = 1. */ cnode->Jchild_CSC = csr_to_csc_alloc(child->jacobian, node->work->iwork); cnode->J_CSC = - cnode->T->block_left_mult_sparsity(cnode->T, cnode->Jchild_CSC, 1); + block_left_multiply_fill_sparsity(cnode->T, cnode->Jchild_CSC, 1); node->jacobian = csc_to_csr_alloc(cnode->J_CSC, cnode->csc_to_csr_work); } @@ -148,7 +104,7 @@ static void eval_jacobian(expr *node) /* T values have been refreshed by forward() if the kernel changed; here we just need to refresh child's Jacobian and rerun the block matmul. */ csr_to_csc_fill_values(child->jacobian, cnode->Jchild_CSC, node->work->iwork); - cnode->T->block_left_mult_values(cnode->T, cnode->Jchild_CSC, cnode->J_CSC); + block_left_multiply_fill_values(cnode->T, cnode->Jchild_CSC, cnode->J_CSC); csc_to_csr_fill_values(cnode->J_CSC, node->jacobian, cnode->csc_to_csr_work); } @@ -196,7 +152,7 @@ static bool is_affine(const expr *node) static void free_type_data(expr *node) { convolve_expr *cnode = (convolve_expr *) node; - free_matrix(cnode->T); + free_csr_matrix(cnode->T); free_csc_matrix(cnode->Jchild_CSC); free_csc_matrix(cnode->J_CSC); free(cnode->csc_to_csr_work); diff --git a/src/atoms/affine/left_matmul.c b/src/atoms/affine/left_matmul.c index d5d87a1..c083385 100644 --- a/src/atoms/affine/left_matmul.c +++ b/src/atoms/affine/left_matmul.c @@ -294,7 +294,7 @@ expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n, lnode->AT = new_dense_matrix(n, m, NULL); node->needs_parameter_refresh = true; } - /* constant matrix and param_source should be null */ + /* constant matrix case */ else { if (data == NULL) diff --git a/src/utils/mini_numpy.c b/src/utils/mini_numpy.c index aa5f862..baa00c5 100644 --- a/src/utils/mini_numpy.c +++ b/src/utils/mini_numpy.c @@ -100,3 +100,31 @@ void I_kron_XT_vec(int m, int k, int n, const double *X, const double *w, double } } } + +void conv_matrix_fill_sparsity(CSR_Matrix *T_csr, int m, int n) +{ + int nnz = 0; + for (int r = 0; r < T_csr->m; r++) + { + T_csr->p[r] = nnz; + int col_lo = r - m + 1 > 0 ? r - m + 1 : 0; + int col_hi = r < n - 1 ? r : n - 1; + for (int col = col_lo; col <= col_hi; col++) + { + T_csr->i[nnz] = col; + nnz++; + } + } + T_csr->p[T_csr->m] = nnz; +} + +void conv_matrix_fill_values(CSR_Matrix *T_csr, const double *a) +{ + for (int r = 0; r < T_csr->m; r++) + { + for (int k = T_csr->p[r]; k < T_csr->p[r + 1]; k++) + { + T_csr->x[k] = a[r - T_csr->i[k]]; + } + } +} From 945ae93868266e6dae839847412bef904717273a Mon Sep 17 00:00:00 2001 From: dance858 Date: Thu, 23 Apr 2026 09:54:18 -0700 Subject: [PATCH 6/7] get rid of block abstraction and use non-block logic instead --- include/subexpr.h | 2 -- src/atoms/affine/convolve.c | 15 ++++----------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/include/subexpr.h b/include/subexpr.h index b129f2e..f97feef 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -161,8 +161,6 @@ typedef struct convolve_expr int n; /* input length */ CSR_Matrix *T; /* (m+n-1) x n convolution matrix */ CSC_Matrix *Jchild_CSC; - CSC_Matrix *J_CSC; - int *csc_to_csr_work; } convolve_expr; /* Bivariate matrix multiplication: Z = f(u) @ g(u) where both children diff --git a/src/atoms/affine/convolve.c b/src/atoms/affine/convolve.c index ab0fbaf..43877aa 100644 --- a/src/atoms/affine/convolve.c +++ b/src/atoms/affine/convolve.c @@ -87,11 +87,9 @@ static void jacobian_init_impl(expr *node) conv_matrix_fill_sparsity(cnode->T, m, n); conv_matrix_fill_values(cnode->T, a); - /* J_node = T @ J_child via block_left_mult with n_blocks = 1. */ + /* J = T @ J_child */ cnode->Jchild_CSC = csr_to_csc_alloc(child->jacobian, node->work->iwork); - cnode->J_CSC = - block_left_multiply_fill_sparsity(cnode->T, cnode->Jchild_CSC, 1); - node->jacobian = csc_to_csr_alloc(cnode->J_CSC, cnode->csc_to_csr_work); + node->jacobian = csr_csc_matmul_alloc(cnode->T, cnode->Jchild_CSC); } static void eval_jacobian(expr *node) @@ -101,11 +99,9 @@ static void eval_jacobian(expr *node) child->eval_jacobian(child); - /* T values have been refreshed by forward() if the kernel changed; here - we just need to refresh child's Jacobian and rerun the block matmul. */ + /* J = T @ J_child */ csr_to_csc_fill_values(child->jacobian, cnode->Jchild_CSC, node->work->iwork); - block_left_multiply_fill_values(cnode->T, cnode->Jchild_CSC, cnode->J_CSC); - csc_to_csr_fill_values(cnode->J_CSC, node->jacobian, cnode->csc_to_csr_work); + csr_csc_matmul_fill_values(cnode->T, cnode->Jchild_CSC, node->jacobian); } static void wsum_hess_init_impl(expr *node) @@ -154,8 +150,6 @@ static void free_type_data(expr *node) convolve_expr *cnode = (convolve_expr *) node; free_csr_matrix(cnode->T); free_csc_matrix(cnode->Jchild_CSC); - free_csc_matrix(cnode->J_CSC); - free(cnode->csc_to_csr_work); free_expr(cnode->param_source); } @@ -207,7 +201,6 @@ expr *new_convolve(expr *param_node, expr *child) /* iwork is used for csr_to_csc of child's Jacobian (size n_vars). */ node->work->iwork = (int *) SP_MALLOC(node->n_vars * sizeof(int)); - cnode->csc_to_csr_work = (int *) SP_MALLOC(node->size * sizeof(int)); /* Ensure first forward() pulls current param values through any broadcast/promote wrappers and reflects them in T (once T is built). */ From 806d0947d170c044b1cc2e4bbd84fcf709ef6806 Mon Sep 17 00:00:00 2001 From: dance858 Date: Thu, 23 Apr 2026 10:02:23 -0700 Subject: [PATCH 7/7] happy with convolve.c --- src/atoms/affine/convolve.c | 38 +++++++++++-------------------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/src/atoms/affine/convolve.c b/src/atoms/affine/convolve.c index 43877aa..55e5857 100644 --- a/src/atoms/affine/convolve.c +++ b/src/atoms/affine/convolve.c @@ -28,15 +28,8 @@ /* 1D full convolution: y = conv(a, child), where a is a length-m kernel * held by param_source and child is a length-n vector. Output length is * m + n - 1. In matrix form, y = T(a) @ child, where T(a) is the - * (m+n-1) x n Toeplitz matrix with T(a)[k, j] = a[k-j] when 0 <= k-j < m. - * - * Forward and Hessian backprop (w' = T(a)^T w) are direct loops, avoiding - * CSR traversal and AT materialization. Jacobian values go through the - * existing block-left-mult path so composite children work correctly. - * - * When param_source is updatable, forward() refreshes T(a)'s CSR values - * from the new kernel before running the convolution. The sparsity of - * T(a) is kernel-independent (staircase), so only values are rewritten. */ + * (m+n-1) x n Toeplitz/convolution matrix with T(a)[k, j] = a[k-j] when + * 0 <= k-j < m. */ static void forward(expr *node, const double *u) { @@ -118,17 +111,14 @@ static void eval_wsum_hess(expr *node, const double *w) { expr *child = node->left; convolve_expr *cnode = (convolve_expr *) node; - int m = cnode->m; - int n = cnode->n; const double *a = cnode->param_source->value; double *w_prime = node->work->dwork; - /* w' = T(a)^T w. T(a)^T[j, k] = a[k-j] for j <= k < j+m, so - w'[j] = sum_{i=0..m-1} a[i] * w[i + j]. */ - for (int j = 0; j < n; j++) + /* w' = conv_matrix.T w, so w'[j] = sum_{i=0..m-1} a[i] * w[i + j]. */ + for (int j = 0; j < cnode->n; j++) { double sum = 0.0; - for (int i = 0; i < m; i++) + for (int i = 0; i < cnode->m; i++) { sum += a[i] * w[i + j]; } @@ -160,18 +150,18 @@ expr *new_convolve(expr *param_node, expr *child) is shape-agnostic (flat buffers of size m + n - 1). Output shape matches the input orientation. */ int m = param_node->size; - int n, out_d1, out_d2; + int n, d1, d2; if (child->d2 == 1) { n = child->d1; - out_d1 = m + n - 1; - out_d2 = 1; + d1 = m + n - 1; + d2 = 1; } else if (child->d1 == 1) { n = child->d2; - out_d1 = 1; - out_d2 = m + n - 1; + d1 = 1; + d2 = m + n - 1; } else { @@ -180,15 +170,9 @@ expr *new_convolve(expr *param_node, expr *child) exit(1); } - if (m < 1 || n < 1) - { - fprintf(stderr, "Error in new_convolve: m and n must be >= 1\n"); - exit(1); - } - convolve_expr *cnode = (convolve_expr *) SP_CALLOC(1, sizeof(convolve_expr)); expr *node = &cnode->base; - init_expr(node, out_d1, out_d2, child->n_vars, forward, jacobian_init_impl, + init_expr(node, d1, d2, child->n_vars, forward, jacobian_init_impl, eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess, free_type_data); node->left = child;