Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions include/atoms/affine.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,31 @@ expr *new_diag_mat(expr *child);
expr *new_upper_tri(expr *child);
expr *new_transpose(expr *child);

/* Left matrix multiplication: A @ f(x) where A is a constant or parameter
* sparse matrix. param_node is NULL for fixed constants. */
/* Left matrix multiplication: A @ f(x) where A is a constant sparse matrix.
param_node is NULL for fixed constants. We currently do not support sparse
parameters, so param_node should always be null. */
expr *new_left_matmul(expr *param_node, expr *u, const CSR_Matrix *A);

/* Left matrix multiplication: A @ f(x) where A is a constant or parameter
* dense matrix (row-major, m x n). Uses CBLAS for efficient computation. */
/* Left matrix multiplication: A @ f(x) where A is a constant dense matrix
(in row-major, m x n, with values given by 'data') or a parameter
representing a dense m x n matrix in row-major.

The 'data' pointer only represents the values of the dense matrix if
param_node is null. If param_node is not null, data must be null. */
expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n,
const double *data);

/* Right matrix multiplication: f(x) @ A where A is a constant or parameter
* matrix. */
/* Right matrix multiplication: f(x) @ A where A is a constant sparse matrix.
We currently do not support sparse parameters, so param_node should always be
null. */
expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A);

/* Right matrix multiplication: f(x) @ A where A is a constant dense matrix
(in row-major, m x n, with values given by 'data') or a parameter
representing a dense m x n matrix in row-major.

The 'data' pointer only represents the values of the dense matrix if
param_node is null. If param_node is not null, data must be null. */
expr *new_right_matmul_dense(expr *param_node, expr *u, int m, int n,
const double *data);

Expand All @@ -65,4 +77,8 @@ expr *new_scalar_mult(expr *param_node, expr *child);
* param_node */
expr *new_vector_mult(expr *param_node, expr *child);

/* 1D full convolution: y = conv(param_node, child) where param_node is the
kernel and may either represent a constant or an updatable parameter */
expr *new_convolve(expr *param_node, expr *child);

#endif /* AFFINE_H */
15 changes: 15 additions & 0 deletions include/subexpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,21 @@ typedef struct vector_mult_expr
expr *param_source;
} vector_mult_expr;

/* 1D convolution: y = conv(a, child) where a is a length-m kernel held by
* param_source. Output has size (m + n - 1) where n is the child length.
* Forward and wsum_hess backprop are computed as direct loops; for Jacobian
* we materialize T(a) as a CSR once at jacobian_init and reuse the engine's
* block-left-mult machinery for composite children. */
typedef struct convolve_expr
{
expr base;
expr *param_source; /* length-m kernel */
int m; /* kernel length */
int n; /* input length */
CSR_Matrix *T; /* (m+n-1) x n convolution matrix */
CSC_Matrix *Jchild_CSC;
} convolve_expr;

/* Bivariate matrix multiplication: Z = f(u) @ g(u) where both children
* may be composite expressions. */
typedef struct matmul_expr
Expand Down
3 changes: 2 additions & 1 deletion include/utils/dense_matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ typedef struct Dense_Matrix
double *work; /* scratch buffer, length n */
} Dense_Matrix;

/* Constructors */
/* Constructors. If data is NULL, the value buffer is allocated but left
uninitialized; otherwise m*n entries are copied from data. */
Matrix *new_dense_matrix(int m, int n, const double *data);

/* Transpose helper */
Expand Down
11 changes: 11 additions & 0 deletions include/utils/mini_numpy.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#ifndef MINI_NUMPY_H
#define MINI_NUMPY_H

#include "utils/CSR_Matrix.h"

/* Example: a = [1, 2], len = 2, repeats = 3, result = [1, 1, 1, 2, 2, 2] */
void repeat(double *result, const double *a, int len, int repeats);

Expand All @@ -41,4 +43,13 @@ void Y_kron_I_vec(int m, int k, int n, const double *Y, const double *w, double
compute v = vec(X^T @ W). */
void I_kron_XT_vec(int m, int k, int n, const double *X, const double *w, double *v);

/* Fill T_csr's row pointers and column indices for the 1D full-convolution
Toeplitz matrix T(a), sized (m+n-1) x n with m*n nonzeros. Values (x) are
not written; call conv_matrix_fill_values to populate them. */
void conv_matrix_fill_sparsity(CSR_Matrix *T_csr, int m, int n);

/* Overwrite T_csr->x from kernel a, using the sparsity already written by
conv_matrix_fill_sparsity. T[r, col] = a[r - col]. */
void conv_matrix_fill_values(CSR_Matrix *T_csr, const double *a);

#endif /* MINI_NUMPY_H */
194 changes: 194 additions & 0 deletions src/atoms/affine/convolve.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
/*
* Copyright 2026 Daniel Cederberg and William Zhang
*
* This file is part of the SparseDiffEngine project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "atoms/affine.h"
#include "subexpr.h"
#include "utils/CSR_Matrix.h"
#include "utils/linalg_sparse_matmuls.h"
#include "utils/mini_numpy.h"
#include "utils/tracked_alloc.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* 1D full convolution: y = conv(a, child), where a is a length-m kernel
* held by param_source and child is a length-n vector. Output length is
* m + n - 1. In matrix form, y = T(a) @ child, where T(a) is the
* (m+n-1) x n Toeplitz/convolution matrix with T(a)[k, j] = a[k-j] when
* 0 <= k-j < m. */

static void forward(expr *node, const double *u)
{
expr *child = node->left;
convolve_expr *cnode = (convolve_expr *) node;

if (cnode->base.needs_parameter_refresh)
{
cnode->param_source->forward(cnode->param_source, NULL);
/* refresh the convolution matrix values if it exists (necessary to check
for null in case someone calls forward before initializing the jacobian,
which might happen when we expose Python bindings to SparseDiffEngine) */
if (cnode->T != NULL)
{
conv_matrix_fill_values(cnode->T, cnode->param_source->value);
}
cnode->base.needs_parameter_refresh = false;
}

child->forward(child, u);

const double *a = cnode->param_source->value;
const double *x = child->value;
double *y = node->value;

memset(y, 0, node->size * sizeof(double));
for (int j = 0; j < cnode->n; j++)
{
for (int i = 0; i < cnode->m; i++)
{
y[i + j] += a[i] * x[j];
}
}
}

static void jacobian_init_impl(expr *node)
{
expr *child = node->left;
convolve_expr *cnode = (convolve_expr *) node;
int m = cnode->m;
int n = cnode->n;
const double *a = cnode->param_source->value;

jacobian_init(child);

/* Build convolution matrix of size (m+n-1) x n with m*n nonzeros */
cnode->T = new_csr_matrix(m + n - 1, n, m * n);
conv_matrix_fill_sparsity(cnode->T, m, n);
conv_matrix_fill_values(cnode->T, a);

/* J = T @ J_child */
cnode->Jchild_CSC = csr_to_csc_alloc(child->jacobian, node->work->iwork);
node->jacobian = csr_csc_matmul_alloc(cnode->T, cnode->Jchild_CSC);
}

static void eval_jacobian(expr *node)
{
expr *child = node->left;
convolve_expr *cnode = (convolve_expr *) node;

child->eval_jacobian(child);

/* J = T @ J_child */
csr_to_csc_fill_values(child->jacobian, cnode->Jchild_CSC, node->work->iwork);
csr_csc_matmul_fill_values(cnode->T, cnode->Jchild_CSC, node->jacobian);
}

static void wsum_hess_init_impl(expr *node)
{
expr *child = node->left;
convolve_expr *cnode = (convolve_expr *) node;

wsum_hess_init(child);
node->wsum_hess = new_csr_copy_sparsity(child->wsum_hess);
node->work->dwork = (double *) SP_MALLOC(cnode->n * sizeof(double));
}

static void eval_wsum_hess(expr *node, const double *w)
{
expr *child = node->left;
convolve_expr *cnode = (convolve_expr *) node;
const double *a = cnode->param_source->value;
double *w_prime = node->work->dwork;

/* w' = conv_matrix.T w, so w'[j] = sum_{i=0..m-1} a[i] * w[i + j]. */
for (int j = 0; j < cnode->n; j++)
{
double sum = 0.0;
for (int i = 0; i < cnode->m; i++)
{
sum += a[i] * w[i + j];
}
w_prime[j] = sum;
}

child->eval_wsum_hess(child, w_prime);
memcpy(node->wsum_hess->x, child->wsum_hess->x,
child->wsum_hess->nnz * sizeof(double));
}

static bool is_affine(const expr *node)
{
return node->left->is_affine(node->left);
}

static void free_type_data(expr *node)
{
convolve_expr *cnode = (convolve_expr *) node;
free_csr_matrix(cnode->T);
free_csc_matrix(cnode->Jchild_CSC);
free_expr(cnode->param_source);
}

expr *new_convolve(expr *param_node, expr *child)
{
/* Accept both (n, 1) column and (1, n) row shapes — numpy-style 1D
arrays reach us as (1, n) through the Python bindings, and the math
is shape-agnostic (flat buffers of size m + n - 1). Output shape
matches the input orientation. */
int m = param_node->size;
int n, d1, d2;
if (child->d2 == 1)
{
n = child->d1;
d1 = m + n - 1;
d2 = 1;
}
else if (child->d1 == 1)
{
n = child->d2;
d1 = 1;
d2 = m + n - 1;
}
else
{
fprintf(stderr, "Error in new_convolve: child must be a vector "
"(shape (n, 1) or (1, n))\n");
exit(1);
}

convolve_expr *cnode = (convolve_expr *) SP_CALLOC(1, sizeof(convolve_expr));
expr *node = &cnode->base;
init_expr(node, d1, d2, child->n_vars, forward, jacobian_init_impl,
eval_jacobian, is_affine, wsum_hess_init_impl, eval_wsum_hess,
free_type_data);
node->left = child;
expr_retain(child);

cnode->param_source = param_node;
expr_retain(param_node);
cnode->m = m;
cnode->n = n;

/* iwork is used for csr_to_csc of child's Jacobian (size n_vars). */
node->work->iwork = (int *) SP_MALLOC(node->n_vars * sizeof(int));

/* Ensure first forward() pulls current param values through any
broadcast/promote wrappers and reflects them in T (once T is built). */
cnode->base.needs_parameter_refresh = true;

return node;
}
26 changes: 23 additions & 3 deletions src/atoms/affine/left_matmul.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,15 +276,35 @@ expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n,
lnode->csc_to_csr_work = (int *) SP_MALLOC(node->size * sizeof(int));
lnode->n_blocks = n_blocks;

lnode->A = new_dense_matrix(m, n, data);
lnode->AT = dense_matrix_trans((const Dense_Matrix *) lnode->A);

/* parameter support */
lnode->param_source = param_node;
if (param_node != NULL)
{
if (data != NULL)
{
fprintf(stderr, "Error in new_left_matmul_dense with ptr convention \n");
exit(1);
}

expr_retain(param_node);
lnode->refresh_param_values = refresh_dense_left;

/* A and AT buffers are filled by refresh_dense_left from the parameter. */
lnode->A = new_dense_matrix(m, n, NULL);
lnode->AT = new_dense_matrix(n, m, NULL);
node->needs_parameter_refresh = true;
}
/* constant matrix case */
else
{
if (data == NULL)
{
fprintf(stderr, "Error in new_left_matmul_dense with ptr convention \n");
exit(1);
}

lnode->A = new_dense_matrix(m, n, data);
lnode->AT = dense_matrix_trans((const Dense_Matrix *) lnode->A);
}

return node;
Expand Down
30 changes: 19 additions & 11 deletions src/atoms/affine/right_matmul.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,24 +82,32 @@ expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A)
expr *new_right_matmul_dense(expr *param_node, expr *u, int m, int n,
const double *data)
{
/* We express: u @ A = (A^T @ u^T)^T. A is m x n, so A^T is n x m. */
double *AT = (double *) SP_MALLOC(n * m * sizeof(double));
A_transpose(AT, data, m, n);
if ((param_node != NULL && data != NULL) || (param_node == NULL && data == NULL))
{
fprintf(stderr, "Error in new_right_matmul_dense with ptr convention \n");
exit(1);
}

/* We express: u @ A = (A^T @ u^T)^T. A is m x n, so A^T is n x m. */
expr *u_transpose = new_transpose(u);
expr *left_matmul_node = new_left_matmul_dense(NULL, u_transpose, n, m, AT);
expr *left_matmul_node;

/* If parameterized, attach param_source and custom refresh */
if (param_node != NULL)
{
left_matmul_node =
new_left_matmul_dense(param_node, u_transpose, n, m, NULL);
left_matmul_expr *lnode = (left_matmul_expr *) left_matmul_node;
lnode->param_source = param_node;
expr_retain(param_node);

/* swap the refresh function */
lnode->refresh_param_values = refresh_dense_right;
}
else
{
double *AT = (double *) SP_MALLOC(n * m * sizeof(double));
A_transpose(AT, data, m, n);
left_matmul_node = new_left_matmul_dense(NULL, u_transpose, n, m, AT);
free(AT);
}

expr *node = new_transpose(left_matmul_node);

free(AT);
return node;
return new_transpose(left_matmul_node);
}
5 changes: 4 additions & 1 deletion src/utils/dense_matrix.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ Matrix *new_dense_matrix(int m, int n, const double *data)
dm->base.update_values = dense_update_values;
dm->base.free_fn = dense_free;
dm->x = (double *) SP_MALLOC(m * n * sizeof(double));
memcpy(dm->x, data, m * n * sizeof(double));
if (data != NULL)
{
memcpy(dm->x, data, m * n * sizeof(double));
}
dm->work = (double *) SP_MALLOC(n * sizeof(double));
return &dm->base;
}
Expand Down
Loading
Loading