SparseDifferentiation · dance858 · Apr 23, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/include/atoms/affine.h b/include/atoms/affine.h
@@ -42,19 +42,31 @@ expr *new_diag_mat(expr *child);
 expr *new_upper_tri(expr *child);
 expr *new_transpose(expr *child);
 
-/* Left matrix multiplication: A @ f(x) where A is a constant or parameter
- * sparse matrix. param_node is NULL for fixed constants. */
+/* Left matrix multiplication: A @ f(x) where A is a constant sparse matrix.
+   param_node is NULL for fixed constants. We currently do not support sparse
+   parameters, so param_node should always be null. */
 expr *new_left_matmul(expr *param_node, expr *u, const CSR_Matrix *A);
 
-/* Left matrix multiplication: A @ f(x) where A is a constant or parameter
- * dense matrix (row-major, m x n). Uses CBLAS for efficient computation. */
+/* Left matrix multiplication: A @ f(x) where A is a constant dense matrix
+   (in row-major, m x n, with values given by 'data') or a parameter
+   representing a dense m x n matrix in row-major.
+
+   The 'data' pointer only represents the values of the dense matrix if
+   param_node is null. If param_node is not null, data must be null. */
 expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n,
                             const double *data);
 
-/* Right matrix multiplication: f(x) @ A where A is a constant or parameter
- * matrix. */
+/* Right matrix multiplication: f(x) @ A where A is a constant sparse matrix.
+   We currently do not support sparse parameters, so param_node should always be
+   null. */
 expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A);
 
+/* Right matrix multiplication: f(x) @ A where A is a constant dense matrix
+   (in row-major, m x n, with values given by 'data') or a parameter
+   representing a dense m x n matrix in row-major.
+
+   The 'data' pointer only represents the values of the dense matrix if
+   param_node is null. If param_node is not null, data must be null. */
 expr *new_right_matmul_dense(expr *param_node, expr *u, int m, int n,
                              const double *data);
 
@@ -65,4 +77,8 @@ expr *new_scalar_mult(expr *param_node, expr *child);
  * param_node */
 expr *new_vector_mult(expr *param_node, expr *child);
 
+/* 1D full convolution: y = conv(param_node, child) where param_node is the
+   kernel and may either represent a constant or an updatable parameter */
+expr *new_convolve(expr *param_node, expr *child);
+
 #endif /* AFFINE_H */
diff --git a/include/subexpr.h b/include/subexpr.h
@@ -148,6 +148,21 @@ typedef struct vector_mult_expr
     expr *param_source;
 } vector_mult_expr;
 
+/* 1D convolution: y = conv(a, child) where a is a length-m kernel held by
+ * param_source. Output has size (m + n - 1) where n is the child length.
+ * Forward and wsum_hess backprop are computed as direct loops; for Jacobian
+ * we materialize T(a) as a CSR once at jacobian_init and reuse the engine's
+ * block-left-mult machinery for composite children. */
+typedef struct convolve_expr
+{
+    expr base;
+    expr *param_source; /* length-m kernel */
+    int m;              /* kernel length */
+    int n;              /* input length */
+    CSR_Matrix *T;      /* (m+n-1) x n convolution matrix */
+    CSC_Matrix *Jchild_CSC;
+} convolve_expr;
+
 /* Bivariate matrix multiplication: Z = f(u) @ g(u) where both children
  * may be composite expressions. */
 typedef struct matmul_expr

diff --git a/include/utils/dense_matrix.h b/include/utils/dense_matrix.h
@@ -28,7 +28,8 @@ typedef struct Dense_Matrix
     double *work; /* scratch buffer, length n */
 } Dense_Matrix;
 
-/* Constructors */
+/* Constructors. If data is NULL, the value buffer is allocated but left
+   uninitialized; otherwise m*n entries are copied from data. */
 Matrix *new_dense_matrix(int m, int n, const double *data);
 
 /* Transpose helper */

diff --git a/include/utils/mini_numpy.h b/include/utils/mini_numpy.h
@@ -18,6 +18,8 @@
 #ifndef MINI_NUMPY_H
 #define MINI_NUMPY_H
 
+#include "utils/CSR_Matrix.h"
+
 /* Example: a = [1, 2], len = 2, repeats = 3, result = [1, 1, 1, 2, 2, 2] */
 void repeat(double *result, const double *a, int len, int repeats);
 
@@ -41,4 +43,13 @@ void Y_kron_I_vec(int m, int k, int n, const double *Y, const double *w, double
    compute v = vec(X^T @ W). */
 void I_kron_XT_vec(int m, int k, int n, const double *X, const double *w, double *v);
 
+/* Fill T_csr's row pointers and column indices for the 1D full-convolution
+   Toeplitz matrix T(a), sized (m+n-1) x n with m*n nonzeros. Values (x) are
+   not written; call conv_matrix_fill_values to populate them. */
+void conv_matrix_fill_sparsity(CSR_Matrix *T_csr, int m, int n);
+
+/* Overwrite T_csr->x from kernel a, using the sparsity already written by
+   conv_matrix_fill_sparsity. T[r, col] = a[r - col]. */
+void conv_matrix_fill_values(CSR_Matrix *T_csr, const double *a);
+
 #endif /* MINI_NUMPY_H */
diff --git a/src/atoms/affine/convolve.c b/src/atoms/affine/convolve.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2026 Daniel Cederberg and William Zhang
+ *
+ * This file is part of the SparseDiffEngine project.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "atoms/affine.h"
+#include "subexpr.h"
+#include "utils/CSR_Matrix.h"
+#include "utils/linalg_sparse_matmuls.h"
+#include "utils/mini_numpy.h"
+#include "utils/tracked_alloc.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* 1D full convolution: y = conv(a, child), where a is a length-m kernel
+ * held by param_source and child is a length-n vector. Output length is
+ * m + n - 1. In matrix form, y = T(a) @ child, where T(a) is the
+ * (m+n-1) x n Toeplitz/convolution matrix with T(a)[k, j] = a[k-j] when
+ * 0 <= k-j < m. */
+
+static void forward(expr *node, const double *u)
+{
+    expr *child = node->left;
+    convolve_expr *cnode = (convolve_expr *) node;
+
+    if (cnode->base.needs_parameter_refresh)
+    {
+        cnode->param_source->forward(cnode->param_source, NULL);
+        /* refresh the convolution matrix values if it exists (necessary to check
+           for null in case someone calls forward before initializing the jacobian,
+           which might happen when we expose Python bindings to SparseDiffEngine) */
+        if (cnode->T != NULL)
+        {
+            conv_matrix_fill_values(cnode->T, cnode->param_source->value);
+        }
+        cnode->base.needs_parameter_refresh = false;
+    }
+
+    child->forward(child, u);
+
+    const double *a = cnode->param_source->value;
+    const double *x = child->value;
+    double *y = node->value;
+
+    memset(y, 0, node->size * sizeof(double));
+    for (int j = 0; j < cnode->n; j++)
+    {
+        for (int i = 0; i < cnode->m; i++)
+        {
+            y[i + j] += a[i] * x[j];
+        }
+    }
+}
+
+static void jacobian_init_impl(expr *node)
+{
+    expr *child = node->left;
+    convolve_expr *cnode = (convolve_expr *) node;
+    int m = cnode->m;
+    int n = cnode->n;
+    const double *a = cnode->param_source->value;
+
+    jacobian_init(child);
+
+    /* Build convolution matrix of size (m+n-1) x n with m*n nonzeros */
+    cnode->T = new_csr_matrix(m + n - 1, n, m * n);
+    conv_matrix_fill_sparsity(cnode->T, m, n);
+    conv_matrix_fill_values(cnode->T, a);
+
+    /* J = T @ J_child */
+    cnode->Jchild_CSC = csr_to_csc_alloc(child->jacobian, node->work->iwork);
+    node->jacobian = csr_csc_matmul_alloc(cnode->T, cnode->Jchild_CSC);
+}
+
+static void eval_jacobian(expr *node)
+{
+    expr *child = node->left;
+    convolve_expr *cnode = (convolve_expr *) node;
+
+    child->eval_jacobian(child);
+
+    /* J = T @ J_child */
+    csr_to_csc_fill_values(child->jacobian, cnode->Jchild_CSC, node->work->iwork);
+    csr_csc_matmul_fill_values(cnode->T, cnode->Jchild_CSC, node->jacobian);
+}
+
+static void wsum_hess_init_impl(expr *node)
+{
+    expr *child = node->left;
+    convolve_expr *cnode = (convolve_expr *) node;
+
+    wsum_hess_init(child);
+    node->wsum_hess = new_csr_copy_sparsity(child->wsum_hess);
+    node->work->dwork = (double *) SP_MALLOC(cnode->n * sizeof(double));
+}
+
+static void eval_wsum_hess(expr *node, const double *w)
+{
+    expr *child = node->left;
+    convolve_expr *cnode = (convolve_expr *) node;
+    const double *a = cnode->param_source->value;
+    double *w_prime = node->work->dwork;
+
+    /* w' = conv_matrix.T w, so w'[j] = sum_{i=0..m-1} a[i] * w[i + j]. */
+    for (int j = 0; j < cnode->n; j++)
+    {
+        double sum = 0.0;
+        for (int i = 0; i < cnode->m; i++)
+        {
+            sum += a[i] * w[i + j];
+        }
+        w_prime[j] = sum;
+    }
+
+    child->eval_wsum_hess(child, w_prime);
+    memcpy(node->wsum_hess->x, child->wsum_hess->x,
+           child->wsum_hess->nnz * sizeof(double));
+}
+
+static bool is_affine(const expr *node)
+{
+    return node->left->is_affine(node->left);
+}
+
+static void free_type_data(expr *node)
+{
+    convolve_expr *cnode = (convolve_expr *) node;
+    free_csr_matrix(cnode->T);
+    free_csc_matrix(cnode->Jchild_CSC);
+    free_expr(cnode->param_source);
+}
+
+expr *new_convolve(expr *param_node, expr *child)
+{
+    /* Accept both (n, 1) column and (1, n) row shapes — numpy-style 1D
+       arrays reach us as (1, n) through the Python bindings, and the math
+       is shape-agnostic (flat buffers of size m + n - 1). Output shape
+       matches the input orientation. */
+    int m = param_node->size;
+    int n, d1, d2;
+    if (child->d2 == 1)
+    {
+        n = child->d1;
+        d1 = m + n - 1;
+        d2 = 1;
+    }
+    else if (child->d1 == 1)
+    {
+        n = child->d2;
+        d1 = 1;
+        d2 = m + n - 1;
+    }
+    else
+    {
+        fprintf(stderr, "Error in new_convolve: child must be a vector "
+                        "(shape (n, 1) or (1, n))\n");
+        exit(1);
+    }
+
+    convolve_expr *cnode = (convolve_expr *) SP_CALLOC(1, sizeof(convolve_expr));
+    expr *node = &cnode->base;
+    init_expr(node, d1, d2, child->n_vars, forward, jacobian_init_impl,
+              eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess,
+              free_type_data);
+    node->left = child;
+    expr_retain(child);
+
+    cnode->param_source = param_node;
+    expr_retain(param_node);
+    cnode->m = m;
+    cnode->n = n;
+
+    /* iwork is used for csr_to_csc of child's Jacobian (size n_vars). */
+    node->work->iwork = (int *) SP_MALLOC(node->n_vars * sizeof(int));
+
+    /* Ensure first forward() pulls current param values through any
+       broadcast/promote wrappers and reflects them in T (once T is built). */
+    cnode->base.needs_parameter_refresh = true;
+
+    return node;
+}
diff --git a/src/atoms/affine/left_matmul.c b/src/atoms/affine/left_matmul.c
@@ -276,15 +276,35 @@ expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n,
     lnode->csc_to_csr_work = (int *) SP_MALLOC(node->size * sizeof(int));
     lnode->n_blocks = n_blocks;
 
-    lnode->A = new_dense_matrix(m, n, data);
-    lnode->AT = dense_matrix_trans((const Dense_Matrix *) lnode->A);
-
     /* parameter support */
     lnode->param_source = param_node;
     if (param_node != NULL)
     {
+        if (data != NULL)
+        {
+            fprintf(stderr, "Error in new_left_matmul_dense with ptr convention \n");
+            exit(1);
+        }
+
         expr_retain(param_node);
         lnode->refresh_param_values = refresh_dense_left;
+
+        /* A and AT buffers are filled by refresh_dense_left from the parameter. */
+        lnode->A = new_dense_matrix(m, n, NULL);
+        lnode->AT = new_dense_matrix(n, m, NULL);
+        node->needs_parameter_refresh = true;
+    }
+    /* constant matrix case */
+    else
+    {
+        if (data == NULL)
+        {
+            fprintf(stderr, "Error in new_left_matmul_dense with ptr convention \n");
+            exit(1);
+        }
+
+        lnode->A = new_dense_matrix(m, n, data);
+        lnode->AT = dense_matrix_trans((const Dense_Matrix *) lnode->A);
     }
 
     return node;

diff --git a/src/atoms/affine/right_matmul.c b/src/atoms/affine/right_matmul.c
@@ -82,24 +82,32 @@ expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A)
 expr *new_right_matmul_dense(expr *param_node, expr *u, int m, int n,
                              const double *data)
 {
-    /* We express: u @ A = (A^T @ u^T)^T. A is m x n, so A^T is n x m. */
-    double *AT = (double *) SP_MALLOC(n * m * sizeof(double));
-    A_transpose(AT, data, m, n);
+    if ((param_node != NULL && data != NULL) || (param_node == NULL && data == NULL))
+    {
+        fprintf(stderr, "Error in new_right_matmul_dense with ptr convention \n");
+        exit(1);
+    }
 
+    /* We express: u @ A = (A^T @ u^T)^T. A is m x n, so A^T is n x m. */
     expr *u_transpose = new_transpose(u);
-    expr *left_matmul_node = new_left_matmul_dense(NULL, u_transpose, n, m, AT);
+    expr *left_matmul_node;
 
-    /* If parameterized, attach param_source and custom refresh */
     if (param_node != NULL)
     {
+        left_matmul_node =
+            new_left_matmul_dense(param_node, u_transpose, n, m, NULL);
         left_matmul_expr *lnode = (left_matmul_expr *) left_matmul_node;
-        lnode->param_source = param_node;
-        expr_retain(param_node);
+
+        /* swap the refresh function */
         lnode->refresh_param_values = refresh_dense_right;
     }
+    else
+    {
+        double *AT = (double *) SP_MALLOC(n * m * sizeof(double));
+        A_transpose(AT, data, m, n);
+        left_matmul_node = new_left_matmul_dense(NULL, u_transpose, n, m, AT);
+        free(AT);
+    }
 
-    expr *node = new_transpose(left_matmul_node);
-
-    free(AT);
-    return node;
+    return new_transpose(left_matmul_node);
 }
diff --git a/src/utils/dense_matrix.c b/src/utils/dense_matrix.c
@@ -67,7 +67,10 @@ Matrix *new_dense_matrix(int m, int n, const double *data)
     dm->base.update_values = dense_update_values;
     dm->base.free_fn = dense_free;
     dm->x = (double *) SP_MALLOC(m * n * sizeof(double));
-    memcpy(dm->x, data, m * n * sizeof(double));
+    if (data != NULL)
+    {
+        memcpy(dm->x, data, m * n * sizeof(double));
+    }
     dm->work = (double *) SP_MALLOC(n * sizeof(double));
     return &dm->base;
 }