diff --git a/include/atoms/affine.h b/include/atoms/affine.h
index 94afe8b..9527d3c 100644
--- a/include/atoms/affine.h
+++ b/include/atoms/affine.h
@@ -42,19 +42,31 @@ expr *new_diag_mat(expr *child);
 expr *new_upper_tri(expr *child);
 expr *new_transpose(expr *child);
 
-/* Left matrix multiplication: A @ f(x) where A is a constant or parameter
- * sparse matrix. param_node is NULL for fixed constants. */
+/* Left matrix multiplication: A @ f(x) where A is a constant sparse matrix.
+   param_node is NULL for fixed constants. We currently do not support sparse
+   parameters, so param_node should always be null. */
 expr *new_left_matmul(expr *param_node, expr *u, const CSR_Matrix *A);
 
-/* Left matrix multiplication: A @ f(x) where A is a constant or parameter
- * dense matrix (row-major, m x n). Uses CBLAS for efficient computation. */
+/* Left matrix multiplication: A @ f(x) where A is a constant dense matrix
+   (in row-major, m x n, with values given by 'data') or a parameter
+   representing a dense m x n matrix in row-major.
+
+   The 'data' pointer only represents the values of the dense matrix if
+   param_node is null. If param_node is not null, data must be null. */
 expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n,
                             const double *data);
 
-/* Right matrix multiplication: f(x) @ A where A is a constant or parameter
- * matrix. */
+/* Right matrix multiplication: f(x) @ A where A is a constant sparse matrix.
+   We currently do not support sparse parameters, so param_node should always be
+   null. */
 expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A);
 
+/* Right matrix multiplication: f(x) @ A where A is a constant dense matrix
+   (in row-major, m x n, with values given by 'data') or a parameter
+   representing a dense m x n matrix in row-major.
+
+   The 'data' pointer only represents the values of the dense matrix if
+   param_node is null. If param_node is not null, data must be null. */
 expr *new_right_matmul_dense(expr *param_node, expr *u, int m, int n,
                              const double *data);
 
@@ -65,4 +77,8 @@ expr *new_scalar_mult(expr *param_node, expr *child);
  * param_node */
 expr *new_vector_mult(expr *param_node, expr *child);
 
+/* 1D full convolution: y = conv(param_node, child) where param_node is the
+   kernel and may either represent a constant or an updatable parameter */
+expr *new_convolve(expr *param_node, expr *child);
+
 #endif /* AFFINE_H */
diff --git a/include/subexpr.h b/include/subexpr.h
index 9c57d23..f97feef 100644
--- a/include/subexpr.h
+++ b/include/subexpr.h
@@ -148,6 +148,21 @@ typedef struct vector_mult_expr
     expr *param_source;
 } vector_mult_expr;
 
+/* 1D convolution: y = conv(a, child) where a is a length-m kernel held by
+ * param_source. Output has size (m + n - 1) where n is the child length.
+ * Forward and wsum_hess backprop are computed as direct loops; for Jacobian
+ * we materialize T(a) as a CSR once at jacobian_init and reuse the engine's
+ * block-left-mult machinery for composite children. */
+typedef struct convolve_expr
+{
+    expr base;
+    expr *param_source; /* length-m kernel */
+    int m;              /* kernel length */
+    int n;              /* input length */
+    CSR_Matrix *T;      /* (m+n-1) x n convolution matrix */
+    CSC_Matrix *Jchild_CSC;
+} convolve_expr;
+
 /* Bivariate matrix multiplication: Z = f(u) @ g(u) where both children
  * may be composite expressions. */
 typedef struct matmul_expr
diff --git a/include/utils/dense_matrix.h b/include/utils/dense_matrix.h
index ab3aea0..bd1693b 100644
--- a/include/utils/dense_matrix.h
+++ b/include/utils/dense_matrix.h
@@ -28,7 +28,8 @@ typedef struct Dense_Matrix
     double *work; /* scratch buffer, length n */
 } Dense_Matrix;
 
-/* Constructors */
+/* Constructors. If data is NULL, the value buffer is allocated but left
+   uninitialized; otherwise m*n entries are copied from data. */
 Matrix *new_dense_matrix(int m, int n, const double *data);
 
 /* Transpose helper */
diff --git a/include/utils/mini_numpy.h b/include/utils/mini_numpy.h
index 1011de3..c87f80a 100644
--- a/include/utils/mini_numpy.h
+++ b/include/utils/mini_numpy.h
@@ -18,6 +18,8 @@
 #ifndef MINI_NUMPY_H
 #define MINI_NUMPY_H
 
+#include "utils/CSR_Matrix.h"
+
 /* Example: a = [1, 2], len = 2, repeats = 3, result = [1, 1, 1, 2, 2, 2] */
 void repeat(double *result, const double *a, int len, int repeats);
 
@@ -41,4 +43,13 @@ void Y_kron_I_vec(int m, int k, int n, const double *Y, const double *w, double
    compute v = vec(X^T @ W). */
 void I_kron_XT_vec(int m, int k, int n, const double *X, const double *w, double *v);
 
+/* Fill T_csr's row pointers and column indices for the 1D full-convolution
+   Toeplitz matrix T(a), sized (m+n-1) x n with m*n nonzeros. Values (x) are
+   not written; call conv_matrix_fill_values to populate them. */
+void conv_matrix_fill_sparsity(CSR_Matrix *T_csr, int m, int n);
+
+/* Overwrite T_csr->x from kernel a, using the sparsity already written by
+   conv_matrix_fill_sparsity. T[r, col] = a[r - col]. */
+void conv_matrix_fill_values(CSR_Matrix *T_csr, const double *a);
+
 #endif /* MINI_NUMPY_H */
diff --git a/src/atoms/affine/convolve.c b/src/atoms/affine/convolve.c
new file mode 100644
index 0000000..55e5857
--- /dev/null
+++ b/src/atoms/affine/convolve.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2026 Daniel Cederberg and William Zhang
+ *
+ * This file is part of the SparseDiffEngine project.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "atoms/affine.h"
+#include "subexpr.h"
+#include "utils/CSR_Matrix.h"
+#include "utils/linalg_sparse_matmuls.h"
+#include "utils/mini_numpy.h"
+#include "utils/tracked_alloc.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* 1D full convolution: y = conv(a, child), where a is a length-m kernel
+ * held by param_source and child is a length-n vector. Output length is
+ * m + n - 1. In matrix form, y = T(a) @ child, where T(a) is the
+ * (m+n-1) x n Toeplitz/convolution matrix with T(a)[k, j] = a[k-j] when
+ * 0 <= k-j < m. */
+
+static void forward(expr *node, const double *u)
+{
+    expr *child = node->left;
+    convolve_expr *cnode = (convolve_expr *) node;
+
+    if (cnode->base.needs_parameter_refresh)
+    {
+        cnode->param_source->forward(cnode->param_source, NULL);
+        /* refresh the convolution matrix values if it exists (necessary to check
+           for null in case someone calls forward before initializing the jacobian,
+           which might happen when we expose Python bindings to SparseDiffEngine) */
+        if (cnode->T != NULL)
+        {
+            conv_matrix_fill_values(cnode->T, cnode->param_source->value);
+        }
+        cnode->base.needs_parameter_refresh = false;
+    }
+
+    child->forward(child, u);
+
+    const double *a = cnode->param_source->value;
+    const double *x = child->value;
+    double *y = node->value;
+
+    memset(y, 0, node->size * sizeof(double));
+    for (int j = 0; j < cnode->n; j++)
+    {
+        for (int i = 0; i < cnode->m; i++)
+        {
+            y[i + j] += a[i] * x[j];
+        }
+    }
+}
+
+static void jacobian_init_impl(expr *node)
+{
+    expr *child = node->left;
+    convolve_expr *cnode = (convolve_expr *) node;
+    int m = cnode->m;
+    int n = cnode->n;
+    const double *a = cnode->param_source->value;
+
+    jacobian_init(child);
+
+    /* Build convolution matrix of size (m+n-1) x n with m*n nonzeros */
+    cnode->T = new_csr_matrix(m + n - 1, n, m * n);
+    conv_matrix_fill_sparsity(cnode->T, m, n);
+    conv_matrix_fill_values(cnode->T, a);
+
+    /* J = T @ J_child */
+    cnode->Jchild_CSC = csr_to_csc_alloc(child->jacobian, node->work->iwork);
+    node->jacobian = csr_csc_matmul_alloc(cnode->T, cnode->Jchild_CSC);
+}
+
+static void eval_jacobian(expr *node)
+{
+    expr *child = node->left;
+    convolve_expr *cnode = (convolve_expr *) node;
+
+    child->eval_jacobian(child);
+
+    /* J = T @ J_child */
+    csr_to_csc_fill_values(child->jacobian, cnode->Jchild_CSC, node->work->iwork);
+    csr_csc_matmul_fill_values(cnode->T, cnode->Jchild_CSC, node->jacobian);
+}
+
+static void wsum_hess_init_impl(expr *node)
+{
+    expr *child = node->left;
+    convolve_expr *cnode = (convolve_expr *) node;
+
+    wsum_hess_init(child);
+    node->wsum_hess = new_csr_copy_sparsity(child->wsum_hess);
+    node->work->dwork = (double *) SP_MALLOC(cnode->n * sizeof(double));
+}
+
+static void eval_wsum_hess(expr *node, const double *w)
+{
+    expr *child = node->left;
+    convolve_expr *cnode = (convolve_expr *) node;
+    const double *a = cnode->param_source->value;
+    double *w_prime = node->work->dwork;
+
+    /* w' = conv_matrix.T w, so w'[j] = sum_{i=0..m-1} a[i] * w[i + j]. */
+    for (int j = 0; j < cnode->n; j++)
+    {
+        double sum = 0.0;
+        for (int i = 0; i < cnode->m; i++)
+        {
+            sum += a[i] * w[i + j];
+        }
+        w_prime[j] = sum;
+    }
+
+    child->eval_wsum_hess(child, w_prime);
+    memcpy(node->wsum_hess->x, child->wsum_hess->x,
+           child->wsum_hess->nnz * sizeof(double));
+}
+
+static bool is_affine(const expr *node)
+{
+    return node->left->is_affine(node->left);
+}
+
+static void free_type_data(expr *node)
+{
+    convolve_expr *cnode = (convolve_expr *) node;
+    free_csr_matrix(cnode->T);
+    free_csc_matrix(cnode->Jchild_CSC);
+    free_expr(cnode->param_source);
+}
+
+expr *new_convolve(expr *param_node, expr *child)
+{
+    /* Accept both (n, 1) column and (1, n) row shapes — numpy-style 1D
+       arrays reach us as (1, n) through the Python bindings, and the math
+       is shape-agnostic (flat buffers of size m + n - 1). Output shape
+       matches the input orientation. */
+    int m = param_node->size;
+    int n, d1, d2;
+    if (child->d2 == 1)
+    {
+        n = child->d1;
+        d1 = m + n - 1;
+        d2 = 1;
+    }
+    else if (child->d1 == 1)
+    {
+        n = child->d2;
+        d1 = 1;
+        d2 = m + n - 1;
+    }
+    else
+    {
+        fprintf(stderr, "Error in new_convolve: child must be a vector "
+                        "(shape (n, 1) or (1, n))\n");
+        exit(1);
+    }
+
+    convolve_expr *cnode = (convolve_expr *) SP_CALLOC(1, sizeof(convolve_expr));
+    expr *node = &cnode->base;
+    init_expr(node, d1, d2, child->n_vars, forward, jacobian_init_impl,
+              eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess,
+              free_type_data);
+    node->left = child;
+    expr_retain(child);
+
+    cnode->param_source = param_node;
+    expr_retain(param_node);
+    cnode->m = m;
+    cnode->n = n;
+
+    /* iwork is used for csr_to_csc of child's Jacobian (size n_vars). */
+    node->work->iwork = (int *) SP_MALLOC(node->n_vars * sizeof(int));
+
+    /* Ensure first forward() pulls current param values through any
+       broadcast/promote wrappers and reflects them in T (once T is built). */
+    cnode->base.needs_parameter_refresh = true;
+
+    return node;
+}
diff --git a/src/atoms/affine/left_matmul.c b/src/atoms/affine/left_matmul.c
index 2928d23..c083385 100644
--- a/src/atoms/affine/left_matmul.c
+++ b/src/atoms/affine/left_matmul.c
@@ -276,15 +276,35 @@ expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n,
     lnode->csc_to_csr_work = (int *) SP_MALLOC(node->size * sizeof(int));
     lnode->n_blocks = n_blocks;
 
-    lnode->A = new_dense_matrix(m, n, data);
-    lnode->AT = dense_matrix_trans((const Dense_Matrix *) lnode->A);
-
     /* parameter support */
     lnode->param_source = param_node;
     if (param_node != NULL)
     {
+        if (data != NULL)
+        {
+            fprintf(stderr, "Error in new_left_matmul_dense with ptr convention \n");
+            exit(1);
+        }
+
         expr_retain(param_node);
         lnode->refresh_param_values = refresh_dense_left;
+
+        /* A and AT buffers are filled by refresh_dense_left from the parameter. */
+        lnode->A = new_dense_matrix(m, n, NULL);
+        lnode->AT = new_dense_matrix(n, m, NULL);
+        node->needs_parameter_refresh = true;
+    }
+    /* constant matrix case */
+    else
+    {
+        if (data == NULL)
+        {
+            fprintf(stderr, "Error in new_left_matmul_dense with ptr convention \n");
+            exit(1);
+        }
+
+        lnode->A = new_dense_matrix(m, n, data);
+        lnode->AT = dense_matrix_trans((const Dense_Matrix *) lnode->A);
     }
 
     return node;
diff --git a/src/atoms/affine/right_matmul.c b/src/atoms/affine/right_matmul.c
index 56075dc..c3344f2 100644
--- a/src/atoms/affine/right_matmul.c
+++ b/src/atoms/affine/right_matmul.c
@@ -82,24 +82,32 @@ expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A)
 expr *new_right_matmul_dense(expr *param_node, expr *u, int m, int n,
                              const double *data)
 {
-    /* We express: u @ A = (A^T @ u^T)^T. A is m x n, so A^T is n x m. */
-    double *AT = (double *) SP_MALLOC(n * m * sizeof(double));
-    A_transpose(AT, data, m, n);
+    if ((param_node != NULL && data != NULL) || (param_node == NULL && data == NULL))
+    {
+        fprintf(stderr, "Error in new_right_matmul_dense with ptr convention \n");
+        exit(1);
+    }
 
+    /* We express: u @ A = (A^T @ u^T)^T. A is m x n, so A^T is n x m. */
     expr *u_transpose = new_transpose(u);
-    expr *left_matmul_node = new_left_matmul_dense(NULL, u_transpose, n, m, AT);
+    expr *left_matmul_node;
 
-    /* If parameterized, attach param_source and custom refresh */
     if (param_node != NULL)
     {
+        left_matmul_node =
+            new_left_matmul_dense(param_node, u_transpose, n, m, NULL);
         left_matmul_expr *lnode = (left_matmul_expr *) left_matmul_node;
-        lnode->param_source = param_node;
-        expr_retain(param_node);
+
+        /* swap the refresh function */
         lnode->refresh_param_values = refresh_dense_right;
     }
+    else
+    {
+        double *AT = (double *) SP_MALLOC(n * m * sizeof(double));
+        A_transpose(AT, data, m, n);
+        left_matmul_node = new_left_matmul_dense(NULL, u_transpose, n, m, AT);
+        free(AT);
+    }
 
-    expr *node = new_transpose(left_matmul_node);
-
-    free(AT);
-    return node;
+    return new_transpose(left_matmul_node);
 }
diff --git a/src/utils/dense_matrix.c b/src/utils/dense_matrix.c
index a521f15..5c628c2 100644
--- a/src/utils/dense_matrix.c
+++ b/src/utils/dense_matrix.c
@@ -67,7 +67,10 @@ Matrix *new_dense_matrix(int m, int n, const double *data)
     dm->base.update_values = dense_update_values;
     dm->base.free_fn = dense_free;
     dm->x = (double *) SP_MALLOC(m * n * sizeof(double));
-    memcpy(dm->x, data, m * n * sizeof(double));
+    if (data != NULL)
+    {
+        memcpy(dm->x, data, m * n * sizeof(double));
+    }
     dm->work = (double *) SP_MALLOC(n * sizeof(double));
     return &dm->base;
 }
diff --git a/src/utils/mini_numpy.c b/src/utils/mini_numpy.c
index aa5f862..baa00c5 100644
--- a/src/utils/mini_numpy.c
+++ b/src/utils/mini_numpy.c
@@ -100,3 +100,31 @@ void I_kron_XT_vec(int m, int k, int n, const double *X, const double *w, double
         }
     }
 }
+
+void conv_matrix_fill_sparsity(CSR_Matrix *T_csr, int m, int n)
+{
+    int nnz = 0;
+    for (int r = 0; r < T_csr->m; r++)
+    {
+        T_csr->p[r] = nnz;
+        int col_lo = r - m + 1 > 0 ? r - m + 1 : 0;
+        int col_hi = r < n - 1 ? r : n - 1;
+        for (int col = col_lo; col <= col_hi; col++)
+        {
+            T_csr->i[nnz] = col;
+            nnz++;
+        }
+    }
+    T_csr->p[T_csr->m] = nnz;
+}
+
+void conv_matrix_fill_values(CSR_Matrix *T_csr, const double *a)
+{
+    for (int r = 0; r < T_csr->m; r++)
+    {
+        for (int k = T_csr->p[r]; k < T_csr->p[r + 1]; k++)
+        {
+            T_csr->x[k] = a[r - T_csr->i[k]];
+        }
+    }
+}
diff --git a/tests/all_tests.c b/tests/all_tests.c
index efaad33..76807b6 100644
--- a/tests/all_tests.c
+++ b/tests/all_tests.c
@@ -7,6 +7,7 @@
 #ifndef PROFILE_ONLY
 #include "forward_pass/affine/test_add.h"
 #include "forward_pass/affine/test_broadcast.h"
+#include "forward_pass/affine/test_convolve.h"
 #include "forward_pass/affine/test_diag_mat.h"
 #include "forward_pass/affine/test_hstack.h"
 #include "forward_pass/affine/test_left_matmul_dense.h"
@@ -25,6 +26,7 @@
 #include "forward_pass/other/test_prod_axis_one.h"
 #include "forward_pass/other/test_prod_axis_zero.h"
 #include "jacobian_tests/affine/test_broadcast.h"
+#include "jacobian_tests/affine/test_convolve.h"
 #include "jacobian_tests/affine/test_diag_mat.h"
 #include "jacobian_tests/affine/test_hstack.h"
 #include "jacobian_tests/affine/test_index.h"
@@ -65,6 +67,7 @@
 #include "utils/test_linalg_utils_matmul_chain_rule.h"
 #include "utils/test_matrix.h"
 #include "wsum_hess/affine/test_broadcast.h"
+#include "wsum_hess/affine/test_convolve.h"
 #include "wsum_hess/affine/test_diag_mat.h"
 #include "wsum_hess/affine/test_hstack.h"
 #include "wsum_hess/affine/test_index.h"
@@ -134,6 +137,9 @@ int main(void)
     mu_run_test(test_forward_prod_axis_one, tests_run);
     mu_run_test(test_matmul, tests_run);
     mu_run_test(test_left_matmul_dense, tests_run);
+    mu_run_test(test_convolve_forward, tests_run);
+    mu_run_test(test_convolve_forward_row, tests_run);
+    mu_run_test(test_convolve_forward_param, tests_run);
     mu_run_test(test_diag_mat_forward, tests_run);
     mu_run_test(test_upper_tri_forward_4x4, tests_run);
 
@@ -215,6 +221,8 @@ int main(void)
     mu_run_test(test_jacobian_right_matmul_log, tests_run);
     mu_run_test(test_jacobian_right_matmul_log_vector, tests_run);
     mu_run_test(test_jacobian_matmul, tests_run);
+    mu_run_test(test_jacobian_convolve, tests_run);
+    mu_run_test(test_jacobian_convolve_composite, tests_run);
     mu_run_test(test_jacobian_transpose, tests_run);
     mu_run_test(test_diag_mat_jacobian_variable, tests_run);
     mu_run_test(test_diag_mat_jacobian_of_log, tests_run);
@@ -280,6 +288,8 @@ int main(void)
     mu_run_test(test_wsum_hess_matmul_yx, tests_run);
     mu_run_test(test_wsum_hess_right_matmul, tests_run);
     mu_run_test(test_wsum_hess_right_matmul_vector, tests_run);
+    mu_run_test(test_wsum_hess_convolve, tests_run);
+    mu_run_test(test_wsum_hess_convolve_composite, tests_run);
     mu_run_test(test_wsum_hess_broadcast_row, tests_run);
     mu_run_test(test_wsum_hess_broadcast_col, tests_run);
     mu_run_test(test_wsum_hess_broadcast_scalar_to_matrix, tests_run);
@@ -374,6 +384,7 @@ int main(void)
     mu_run_test(test_param_shared_left_matmul_problem, tests_run);
     mu_run_test(test_param_fixed_skip_in_update, tests_run);
     mu_run_test(test_param_scalar_mult_problem_with_constant, tests_run);
+    mu_run_test(test_param_convolve_problem, tests_run);
 
     printf("\n--- Parameter + Broadcast Tests ---\n");
     mu_run_test(test_constant_broadcast_vector_mult, tests_run);
@@ -384,6 +395,7 @@ int main(void)
     mu_run_test(test_param_sum_scalar_mult, tests_run);
     mu_run_test(test_const_hstack_left_matmul, tests_run);
     mu_run_test(test_param_hstack_left_matmul, tests_run);
+    mu_run_test(test_param_scalar_mult_convolve, tests_run);
 #endif /* PROFILE_ONLY */
 
 #ifdef PROFILE_ONLY
diff --git a/tests/forward_pass/affine/test_convolve.h b/tests/forward_pass/affine/test_convolve.h
new file mode 100644
index 0000000..14d5988
--- /dev/null
+++ b/tests/forward_pass/affine/test_convolve.h
@@ -0,0 +1,77 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "atoms/affine.h"
+#include "expr.h"
+#include "minunit.h"
+#include "subexpr.h"
+#include "test_helpers.h"
+
+const char *test_convolve_forward(void)
+{
+    /* Test: y = conv([1, 2, 3], x) where x = [1, 2, 3].
+     * Expected y = [1, 4, 10, 12, 9]. */
+    double kernel[3] = {1.0, 2.0, 3.0};
+    expr *kernel_param = new_parameter(3, 1, PARAM_FIXED, 3, kernel);
+    expr *x = new_variable(3, 1, 0, 3);
+    expr *y = new_convolve(kernel_param, x);
+
+    double u[3] = {1.0, 2.0, 3.0};
+    y->forward(y, u);
+
+    double expected[5] = {1.0, 4.0, 10.0, 12.0, 9.0};
+
+    mu_assert("convolve result should have d1=5", y->d1 == 5);
+    mu_assert("convolve result should have d2=1", y->d2 == 1);
+    mu_assert("convolve result should have size=5", y->size == 5);
+    mu_assert("Convolve forward pass test failed",
+              cmp_double_array(y->value, expected, 5));
+
+    free_expr(y);
+    return 0;
+}
+
+const char *test_convolve_forward_row(void)
+{
+    /* Same as test_convolve_forward but x is a (1, 3) row vector.
+     * Output orientation should match: (1, 5). */
+    double kernel[3] = {1.0, 2.0, 3.0};
+    expr *kernel_param = new_parameter(3, 1, PARAM_FIXED, 3, kernel);
+    expr *x = new_variable(1, 3, 0, 3);
+    expr *y = new_convolve(kernel_param, x);
+
+    double u[3] = {1.0, 2.0, 3.0};
+    y->forward(y, u);
+
+    double expected[5] = {1.0, 4.0, 10.0, 12.0, 9.0};
+
+    mu_assert("convolve row result should have d1=1", y->d1 == 1);
+    mu_assert("convolve row result should have d2=5", y->d2 == 5);
+    mu_assert("convolve row result should have size=5", y->size == 5);
+    mu_assert("Convolve forward pass (row vector) failed",
+              cmp_double_array(y->value, expected, 5));
+
+    free_expr(y);
+    return 0;
+}
+
+const char *test_convolve_forward_param(void)
+{
+    /* Same as test_convolve_forward, but the kernel is an updatable parameter
+     * (param_id = 0). Exercises the needs_parameter_refresh path in forward(). */
+    double kernel[3] = {1.0, 2.0, 3.0};
+    expr *kernel_param = new_parameter(3, 1, 0, 3, kernel);
+    expr *x = new_variable(3, 1, 0, 3);
+    expr *y = new_convolve(kernel_param, x);
+
+    double u[3] = {1.0, 2.0, 3.0};
+    y->forward(y, u);
+
+    double expected[5] = {1.0, 4.0, 10.0, 12.0, 9.0};
+
+    mu_assert("Convolve forward pass with param kernel failed",
+              cmp_double_array(y->value, expected, 5));
+
+    free_expr(y);
+    return 0;
+}
diff --git a/tests/jacobian_tests/affine/test_convolve.h b/tests/jacobian_tests/affine/test_convolve.h
new file mode 100644
index 0000000..c3aae98
--- /dev/null
+++ b/tests/jacobian_tests/affine/test_convolve.h
@@ -0,0 +1,68 @@
+#include <math.h>
+#include <stdio.h>
+
+#include "atoms/affine.h"
+#include "atoms/elementwise_restricted_dom.h"
+#include "expr.h"
+#include "minunit.h"
+#include "numerical_diff.h"
+#include "subexpr.h"
+#include "test_helpers.h"
+
+const char *test_jacobian_convolve(void)
+{
+    /* Test: Jacobian of y = conv([1, 2, 3], x) for x = Variable(3).
+     * J should equal T(kernel):
+     *   [[1, 0, 0],
+     *    [2, 1, 0],
+     *    [3, 2, 1],
+     *    [0, 3, 2],
+     *    [0, 0, 3]]
+     * stored in CSR with nnz = 9, shape 5 x 3. */
+    double kernel[3] = {1.0, 2.0, 3.0};
+    expr *kernel_param = new_parameter(3, 1, PARAM_FIXED, 3, kernel);
+    expr *x = new_variable(3, 1, 0, 3);
+    expr *y = new_convolve(kernel_param, x);
+
+    double u[3] = {1.0, 2.0, 3.0};
+    y->forward(y, u);
+    jacobian_init(y);
+    y->eval_jacobian(y);
+
+    mu_assert("Jacobian should have 5 rows", y->jacobian->m == 5);
+    mu_assert("Jacobian should have 3 columns", y->jacobian->n == 3);
+    mu_assert("Jacobian should have 9 nonzeros", y->jacobian->nnz == 9);
+
+    int expected_p[6] = {0, 1, 3, 6, 8, 9};
+    int expected_i[9] = {0, 0, 1, 0, 1, 2, 1, 2, 2};
+    double expected_x[9] = {1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 3.0, 2.0, 3.0};
+
+    mu_assert("Convolve Jacobian row pointers incorrect",
+              cmp_int_array(y->jacobian->p, expected_p, 6));
+    mu_assert("Convolve Jacobian column indices incorrect",
+              cmp_int_array(y->jacobian->i, expected_i, 9));
+    mu_assert("Convolve Jacobian values incorrect",
+              cmp_double_array(y->jacobian->x, expected_x, 9));
+
+    free_expr(y);
+    return 0;
+}
+
+const char *test_jacobian_convolve_composite(void)
+{
+    /* y = conv([1, 2, 3], log(x)) — verify via numerical differentiation that
+     * the composite path (T(a) @ J_log) matches. */
+    double kernel[3] = {1.0, 2.0, 3.0};
+    double x_vals[3] = {1.0, 2.0, 3.0};
+
+    expr *kernel_param = new_parameter(3, 1, PARAM_FIXED, 3, kernel);
+    expr *x = new_variable(3, 1, 0, 3);
+    expr *log_x = new_log(x);
+    expr *y = new_convolve(kernel_param, log_x);
+
+    mu_assert("check_jacobian failed",
+              check_jacobian_num(y, x_vals, NUMERICAL_DIFF_DEFAULT_H));
+
+    free_expr(y);
+    return 0;
+}
diff --git a/tests/problem/test_param_broadcast.h b/tests/problem/test_param_broadcast.h
index 032e8eb..377121a 100644
--- a/tests/problem/test_param_broadcast.h
+++ b/tests/problem/test_param_broadcast.h
@@ -269,9 +269,7 @@ const char *test_const_hstack_left_matmul(void)
     expr *p2 = new_parameter(1, n, PARAM_FIXED, 2 * n, p2_vals);
     expr *param_nodes[2] = {p1, p2};
     expr *p_hstack = new_hstack(param_nodes, 2, 2 * n);
-    /* pass concatenated parameter vectors */
-    double A_data[8] = {1.0, 2.0, 3.0, 0.0, 4.0, 0.0, 5.0, 6.0};
-    expr *objective = new_left_matmul_dense(p_hstack, x, 1, 2 * n, A_data);
+    expr *objective = new_left_matmul_dense(p_hstack, x, 1, 2 * n, NULL);
     problem *prob = new_problem(objective, NULL, 0, false);
 
     problem_init_derivatives(prob);
@@ -304,9 +302,7 @@ const char *test_param_hstack_left_matmul(void)
     expr *p2 = new_parameter(1, n, n, 2 * n, p2_vals);
     expr *param_nodes[2] = {p1, p2};
     expr *p_hstack = new_hstack(param_nodes, 2, 2 * n);
-    /* pass concatenated parameter vectors */
-    double A_data[8] = {1.0, 2.0, 3.0, 0.0, 4.0, 0.0, 5.0, 6.0};
-    expr *objective = new_left_matmul_dense(p_hstack, x, 1, 2 * n, A_data);
+    expr *objective = new_left_matmul_dense(p_hstack, x, 1, 2 * n, NULL);
     problem *prob = new_problem(objective, NULL, 0, false);
 
     problem_register_params(prob, param_nodes, 2);
@@ -341,4 +337,68 @@ const char *test_param_hstack_left_matmul(void)
     return 0;
 }
 
+const char *test_param_scalar_mult_convolve(void)
+{
+    int n = 3;
+
+    /* minimize sum(x) subject to conv(s * kernel, x), where kernel is a fixed
+       length-4 vector and s is an updatable scalar parameter. Scaling s
+       rescales the entire kernel, so Jacobian and constraint values scale
+       linearly with s. */
+    expr *x = new_variable(3, 1, 0, n);
+    expr *objective = new_sum(x, -1);
+
+    double kernel_vals[4] = {1.0, 2.0, 3.0, 4.0};
+    expr *kernel = new_parameter(4, 1, PARAM_FIXED, n, kernel_vals);
+    double s_vals[1] = {1.0};
+    expr *s = new_parameter(1, 1, 0, n, s_vals);
+    expr *scaled_kernel = new_scalar_mult(s, kernel);
+
+    expr *constraint = new_convolve(scaled_kernel, x);
+    expr *constraints[1] = {constraint};
+    problem *prob = new_problem(objective, constraints, 1, false);
+
+    expr *param_nodes[1] = {s};
+    problem_register_params(prob, param_nodes, 1);
+    problem_init_derivatives(prob);
+
+    /* point for evaluating and sparsity arrays (T is 6x3 with 12 nonzeros) */
+    double x_vals[3] = {1.0, 2.0, 3.0};
+    int Ap[7] = {0, 1, 3, 6, 9, 11, 12};
+    int Ai[12] = {0, 0, 1, 0, 1, 2, 0, 1, 2, 1, 2, 2};
+
+    /* test 1: s = 1, effective kernel = [1, 2, 3, 4] */
+    problem_constraint_forward(prob, x_vals);
+    problem_jacobian(prob);
+    double constrs[6] = {1.0, 4.0, 10.0, 16.0, 17.0, 12.0};
+    double Ax[12] = {1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 4.0, 3.0, 2.0, 4.0, 3.0, 4.0};
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 6));
+    mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 7));
+    mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 12));
+    mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 12));
+
+    mu_assert("check_jacobian failed",
+              check_jacobian_num(constraint, x_vals, NUMERICAL_DIFF_DEFAULT_H));
+
+    /* test 2: s = 10 via problem_update_params, effective kernel = [10,20,30,40] */
+    double theta[1] = {10.0};
+    problem_update_params(prob, theta);
+    problem_constraint_forward(prob, x_vals);
+    problem_jacobian(prob);
+    double updated_constrs[6] = {10.0, 40.0, 100.0, 160.0, 170.0, 120.0};
+    double updated_Ax[12] = {10.0, 20.0, 10.0, 30.0, 20.0, 10.0,
+                             40.0, 30.0, 20.0, 40.0, 30.0, 40.0};
+    mu_assert("vals fail",
+              cmp_double_array(prob->constraint_values, updated_constrs, 6));
+    mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 7));
+    mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 12));
+    mu_assert("vals fail", cmp_double_array(prob->jacobian->x, updated_Ax, 12));
+
+    mu_assert("check_jacobian failed",
+              check_jacobian_num(constraint, x_vals, NUMERICAL_DIFF_DEFAULT_H));
+
+    free_problem(prob);
+    return 0;
+}
+
 #endif /* TEST_PARAM_BROADCAST_H */
diff --git a/tests/problem/test_param_prob.h b/tests/problem/test_param_prob.h
index 40350d6..a638b31 100644
--- a/tests/problem/test_param_prob.h
+++ b/tests/problem/test_param_prob.h
@@ -6,6 +6,7 @@
 #include <string.h>
 
 #include "atoms/affine.h"
+#include "atoms/elementwise_full_dom.h"
 #include "atoms/elementwise_restricted_dom.h"
 #include "expr.h"
 #include "minunit.h"
@@ -166,12 +167,11 @@ const char *test_param_left_matmul_problem(void)
     /* minimize sum(x) subject to Ax = ?, with A parameter */
     expr *x = new_variable(2, 1, 0, n);
     expr *objective = new_sum(x, -1);
-    double theta[4] = {1.0, 0.0, 0.0, 1.0};
+    /* column-major of A = [[2,4],[0,1]] */
+    double theta[4] = {2.0, 0.0, 4.0, 1.0};
     expr *A_param = new_parameter(2, 2, 0, n, theta);
 
-    /* dense 2x2 matrix */
-    double Ax[4] = {2.0, 0.0, 0.0, 1.0};
-    expr *constraint = new_left_matmul_dense(A_param, x, 2, 2, Ax);
+    expr *constraint = new_left_matmul_dense(A_param, x, 2, 2, NULL);
     expr *constraints[1] = {constraint};
     problem *prob = new_problem(objective, constraints, 1, false);
 
@@ -187,7 +187,8 @@ const char *test_param_left_matmul_problem(void)
 
     /* test 1: initial jacobian */
     problem_constraint_forward(prob, x_vals);
-    double constrs[2] = {2.0, 2.0};
+    double constrs[2] = {10.0, 2.0};
+    double Ax[4] = {2.0, 4.0, 0.0, 1.0};
     problem_jacobian(prob);
     mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 2));
     mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 4));
@@ -226,12 +227,11 @@ const char *test_param_right_matmul_problem(void)
     /* minimize sum(x) subject to xA = ?, with A parameter */
     expr *x = new_variable(1, 2, 0, n);
     expr *objective = new_sum(x, -1);
-    double theta[4] = {1.0, 0.0, 0.0, 1.0};
+    /* column-major of A = [[2,0],[0,1]] */
+    double theta[4] = {2.0, 0.0, 0.0, 1.0};
     expr *A_param = new_parameter(2, 2, 0, n, theta);
 
-    /* dense 2x2 matrix */
-    double Ax[4] = {2.0, 0.0, 0.0, 1.0};
-    expr *constraint = new_right_matmul_dense(A_param, x, 2, 2, Ax);
+    expr *constraint = new_right_matmul_dense(A_param, x, 2, 2, NULL);
     expr *constraints[1] = {constraint};
     problem *prob = new_problem(objective, constraints, 1, false);
 
@@ -248,6 +248,7 @@ const char *test_param_right_matmul_problem(void)
     /* test 1: initial jacobian */
     problem_constraint_forward(prob, x_vals);
     double constrs[2] = {2.0, 2.0};
+    double Ax[4] = {2.0, 0.0, 0.0, 1.0};
     problem_jacobian(prob);
     mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 2));
     mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 4));
@@ -342,12 +343,11 @@ const char *test_param_left_matmul_rectangular(void)
     /* minimize sum(x) subject to Ax = ?, with A parameter (3x2) */
     expr *x = new_variable(2, 1, 0, n);
     expr *objective = new_sum(x, -1);
-    double theta[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+    /* column-major of A = [[1,2],[3,4],[5,6]] */
+    double theta[6] = {1.0, 3.0, 5.0, 2.0, 4.0, 6.0};
     expr *A_param = new_parameter(3, 2, 0, n, theta);
 
-    /* dense 3x2 matrix */
-    double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
-    expr *constraint = new_left_matmul_dense(A_param, x, 3, 2, Ax);
+    expr *constraint = new_left_matmul_dense(A_param, x, 3, 2, NULL);
     expr *constraints[1] = {constraint};
     problem *prob = new_problem(objective, constraints, 1, false);
 
@@ -364,6 +364,7 @@ const char *test_param_left_matmul_rectangular(void)
     /* test 1: initial jacobian */
     problem_constraint_forward(prob, x_vals);
     double constrs[3] = {5.0, 11.0, 17.0};
+    double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
     problem_jacobian(prob);
     mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 3));
     mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 6));
@@ -407,12 +408,11 @@ const char *test_param_right_matmul_rectangular(void)
     /* minimize sum(x) subject to xA = ?, with A parameter (2x3) */
     expr *x = new_variable(1, 2, 0, n);
     expr *objective = new_sum(x, -1);
-    double theta[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+    /* column-major of A = [[1,2,3],[4,5,6]] */
+    double theta[6] = {1.0, 4.0, 2.0, 5.0, 3.0, 6.0};
     expr *A_param = new_parameter(2, 3, 0, n, theta);
 
-    /* dense 2x3 matrix */
-    double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
-    expr *constraint = new_right_matmul_dense(A_param, x, 2, 3, Ax);
+    expr *constraint = new_right_matmul_dense(A_param, x, 2, 3, NULL);
     expr *constraints[1] = {constraint};
     problem *prob = new_problem(objective, constraints, 1, false);
 
@@ -477,11 +477,9 @@ const char *test_param_shared_left_matmul_problem(void)
     double theta[4] = {1.0, 0.0, 0.0, 1.0};
     expr *A_param = new_parameter(2, 2, 0, n, theta);
 
-    /* dense 2x2 identity */
-    double Ax[4] = {1.0, 0.0, 0.0, 1.0};
     expr *constraints[2];
-    constraints[0] = new_left_matmul_dense(A_param, x, 2, 2, Ax);
-    constraints[1] = new_left_matmul_dense(A_param, y, 2, 2, Ax);
+    constraints[0] = new_left_matmul_dense(A_param, x, 2, 2, NULL);
+    constraints[1] = new_left_matmul_dense(A_param, y, 2, 2, NULL);
     problem *prob = new_problem(objective, constraints, 2, false);
 
     /* register parameters and fill sparsity patterns */
@@ -535,4 +533,89 @@ const char *test_param_shared_left_matmul_problem(void)
     return 0;
 }
 
+const char *test_param_convolve_problem(void)
+{
+    int n = 3;
+
+    /* minimize sum(x) subject to conv(a, exp(x)), with kernel a as parameter.
+       The exp nonlinearity gives a non-zero Hessian that depends on the kernel,
+       so both Jacobian and Hessian refresh paths get exercised on
+       problem_update_params. At x = [0, 0, 0] we have exp(x) = 1, so J = T
+       and H diagonal = T^T @ 1_5 = sum(a) per column. */
+    expr *x = new_variable(3, 1, 0, n);
+    expr *exp_x = new_exp(x);
+    expr *objective = new_sum(x, -1);
+    double theta[3] = {1.0, 2.0, 3.0};
+    expr *a_param = new_parameter(3, 1, 0, n, theta);
+    expr *constraint = new_convolve(a_param, exp_x);
+    expr *constraints[1] = {constraint};
+    problem *prob = new_problem(objective, constraints, 1, true);
+
+    /* register parameters and fill sparsity patterns */
+    expr *param_nodes[1] = {a_param};
+    problem_register_params(prob, param_nodes, 1);
+    problem_init_derivatives(prob);
+
+    /* point for evaluating and constant sparsity arrays */
+    double x_vals[3] = {0.0, 0.0, 0.0};
+    double w[5] = {1.0, 1.0, 1.0, 1.0, 1.0};
+    int Ap[6] = {0, 1, 3, 6, 8, 9};
+    int Ai[9] = {0, 0, 1, 0, 1, 2, 1, 2, 2};
+    int Hp[4] = {0, 1, 2, 3};
+    int Hi[3] = {0, 1, 2};
+
+    /* test 1: kernel a = [1, 2, 3] */
+    problem_constraint_forward(prob, x_vals);
+    problem_jacobian(prob);
+    problem_hessian(prob, 0.0, w);
+    double constrs[5] = {1.0, 3.0, 6.0, 5.0, 3.0};
+    double Ax[9] = {1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 3.0, 2.0, 3.0};
+    double Hx[3] = {6.0, 6.0, 6.0};
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 5));
+    mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 6));
+    mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 9));
+    mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 9));
+    mu_assert("hess rows fail", cmp_int_array(prob->lagrange_hessian->p, Hp, 4));
+    mu_assert("hess cols fail", cmp_int_array(prob->lagrange_hessian->i, Hi, 3));
+    mu_assert("hess vals fail", cmp_double_array(prob->lagrange_hessian->x, Hx, 3));
+
+    /* test 2: kernel a = [4, 5, 6] — refresh path must rebuild T(a) values
+       and propagate to both Jacobian and Hessian. */
+    theta[0] = 4.0;
+    theta[1] = 5.0;
+    theta[2] = 6.0;
+    problem_update_params(prob, theta);
+    problem_constraint_forward(prob, x_vals);
+    problem_jacobian(prob);
+    problem_hessian(prob, 0.0, w);
+    constrs[0] = 4.0;
+    constrs[1] = 9.0;
+    constrs[2] = 15.0;
+    constrs[3] = 11.0;
+    constrs[4] = 6.0;
+    Ax[0] = 4.0;
+    Ax[1] = 5.0;
+    Ax[2] = 4.0;
+    Ax[3] = 6.0;
+    Ax[4] = 5.0;
+    Ax[5] = 4.0;
+    Ax[6] = 6.0;
+    Ax[7] = 5.0;
+    Ax[8] = 6.0;
+    Hx[0] = 15.0;
+    Hx[1] = 15.0;
+    Hx[2] = 15.0;
+    mu_assert("vals fail", cmp_double_array(prob->constraint_values, constrs, 5));
+    mu_assert("rows fail", cmp_int_array(prob->jacobian->p, Ap, 6));
+    mu_assert("cols fail", cmp_int_array(prob->jacobian->i, Ai, 9));
+    mu_assert("vals fail", cmp_double_array(prob->jacobian->x, Ax, 9));
+    mu_assert("hess rows fail", cmp_int_array(prob->lagrange_hessian->p, Hp, 4));
+    mu_assert("hess cols fail", cmp_int_array(prob->lagrange_hessian->i, Hi, 3));
+    mu_assert("hess vals fail", cmp_double_array(prob->lagrange_hessian->x, Hx, 3));
+
+    free_problem(prob);
+
+    return 0;
+}
+
 #endif /* TEST_PARAM_PROB_H */
diff --git a/tests/wsum_hess/affine/test_convolve.h b/tests/wsum_hess/affine/test_convolve.h
new file mode 100644
index 0000000..98dd7ab
--- /dev/null
+++ b/tests/wsum_hess/affine/test_convolve.h
@@ -0,0 +1,57 @@
+#include <stdio.h>
+
+#include "atoms/affine.h"
+#include "atoms/elementwise_restricted_dom.h"
+#include "expr.h"
+#include "minunit.h"
+#include "numerical_diff.h"
+#include "subexpr.h"
+#include "test_helpers.h"
+
+const char *test_wsum_hess_convolve(void)
+{
+    /* y = conv([1, 2, 3], x) is linear in x when the kernel is constant.
+     * Child x is a leaf variable (zero Hessian), so y's weighted Hessian
+     * should be zero for any weight vector w. */
+    double kernel[3] = {1.0, 2.0, 3.0};
+    expr *kernel_param = new_parameter(3, 1, PARAM_FIXED, 3, kernel);
+    expr *x = new_variable(3, 1, 0, 3);
+    expr *y = new_convolve(kernel_param, x);
+
+    double u[3] = {1.0, 2.0, 3.0};
+    double w[5] = {1.0, -1.0, 2.0, 3.0, -2.0};
+
+    y->forward(y, u);
+    jacobian_init(y);
+    wsum_hess_init(y);
+    y->eval_wsum_hess(y, w);
+
+    mu_assert("Convolve wsum_hess should be 3x3", y->wsum_hess->m == 3);
+    mu_assert("Convolve wsum_hess should be square", y->wsum_hess->n == 3);
+    mu_assert("Convolve wsum_hess should have zero nonzeros",
+              y->wsum_hess->nnz == 0);
+
+    free_expr(y);
+    return 0;
+}
+
+const char *test_wsum_hess_convolve_composite(void)
+{
+    /* y = conv([1, 2, 3], log(x)) — nonlinear in x, so backprop of weights
+     * through the Toeplitz should produce the same Hessian as a numerical
+     * second-derivative check. */
+    double kernel[3] = {1.0, 2.0, 3.0};
+    double x_vals[3] = {1.0, 2.0, 3.0};
+    double w[5] = {1.0, -1.0, 2.0, 3.0, -2.0};
+
+    expr *kernel_param = new_parameter(3, 1, PARAM_FIXED, 3, kernel);
+    expr *x = new_variable(3, 1, 0, 3);
+    expr *log_x = new_log(x);
+    expr *y = new_convolve(kernel_param, log_x);
+
+    mu_assert("check_wsum_hess failed",
+              check_wsum_hess(y, x_vals, w, NUMERICAL_DIFF_DEFAULT_H));
+
+    free_expr(y);
+    return 0;
+}