Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
4813082
Matrix multiplication does not change MatrixCSR
chrisrichardson Apr 16, 2026
0b0b958
Add transpose implementation
chrisrichardson Apr 16, 2026
7c06575
fix up test a bit
chrisrichardson Apr 16, 2026
3fdebef
Update tests
chrisrichardson Apr 17, 2026
1a541ad
Use non-blocking MPI for y update
chrisrichardson Apr 17, 2026
109843c
revert and update test
chrisrichardson Apr 17, 2026
f822b92
Add mutmul implementation
chrisrichardson Apr 17, 2026
594c2c0
Update documentation and simplify fetch
chrisrichardson Apr 17, 2026
0eca151
bug fix
chrisrichardson Apr 17, 2026
3ad0e15
merge
chrisrichardson Apr 17, 2026
254d561
Update header
chrisrichardson Apr 17, 2026
d3d7600
update copyright
chrisrichardson Apr 17, 2026
463f9e6
Factor out mat_gather into conftest.py
chrisrichardson Apr 17, 2026
f905bb6
docs
chrisrichardson Apr 17, 2026
5bb2cb2
wrapping
chrisrichardson Apr 17, 2026
82bc93a
revert for this PR
chrisrichardson Apr 17, 2026
6f18d46
clang-format
chrisrichardson Apr 17, 2026
17b342b
And python wrappers
chrisrichardson Apr 17, 2026
fcf3e3a
documentation
chrisrichardson Apr 17, 2026
8aead85
clang-format
chrisrichardson Apr 17, 2026
8e67100
not sure why superlu_dist docs are causing problem...
chrisrichardson Apr 18, 2026
ba1d0a3
int type
chrisrichardson Apr 19, 2026
37f66ef
Add block size
chrisrichardson Apr 20, 2026
89f008f
Merge branch 'main' into chris/matvec-transpose
chrisrichardson Apr 20, 2026
703c6eb
clang-format
chrisrichardson Apr 21, 2026
8334d43
Merge branch 'main' into chris/matvec-transpose
chrisrichardson Apr 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/dolfinx/la/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ set(HEADERS_la
${CMAKE_CURRENT_SOURCE_DIR}/dolfinx_la.h
${CMAKE_CURRENT_SOURCE_DIR}/MatrixCSR.h
${CMAKE_CURRENT_SOURCE_DIR}/matrix_csr_impl.h
${CMAKE_CURRENT_SOURCE_DIR}/matmul.h
${CMAKE_CURRENT_SOURCE_DIR}/SparsityPattern.h
${CMAKE_CURRENT_SOURCE_DIR}/superlu_dist.h
${CMAKE_CURRENT_SOURCE_DIR}/Vector.h
Expand Down
73 changes: 73 additions & 0 deletions cpp/dolfinx/la/MatrixCSR.h
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,40 @@ class MatrixCSR
/// @param[in,out] y Vector to accumulate the result into.
void mult(Vector<value_type>& x, Vector<value_type>& y) const;

/// @brief Compute the product `y += A^T x`.
///
/// Performs the distributed sparse matrix–vector product with the
/// *transpose* of the matrix. The computation is split into two phases:
///
/// 1. **Off-diagonal phase** — contributions from the off-diagonal block
/// (ghost columns of A, i.e. columns owned by remote ranks) are
/// accumulated into the ghost region of `y`. The ghost entries of
/// `y` are zeroed before this phase so that stale values do not
/// pollute the result. A reverse scatter (`scatter_rev`) then
/// reduces those ghost contributions back to the owning ranks.
///
/// 2. **Diagonal phase** — contributions from the diagonal block
/// (locally owned columns of A) are accumulated into the owned
/// entries of `y`.
///
/// **Layout requirements:**
/// - `x` must share the same `IndexMap` as the matrix *rows*,
/// `A.index_map(0)`. All ghost entries of `x` must be up-to-date
/// before calling (the implementation reads `x.array()` directly
/// without a forward scatter).
/// - `y` must share the same *owned* indices as the matrix *columns*,
/// `A.index_map(1)`. Only owned entries of `y` are meaningful after
/// the call; ghost entries are used as scratch and left in an
/// unspecified state.
///
/// @note `y` is accumulated *into* (not overwritten) on the owned
/// entries. Zero `y` before the first call if a fresh result is
/// required.
///
/// @param[in] x Vector to apply `A^T` to; must cover the row space of A.
/// @param[in,out] y Vector accumulated into; covers the column space of A.
void multT(Vector<value_type>& x, Vector<value_type>& y) const;

/// @brief Get MPI communicator that matrix is defined on.
MPI_Comm comm() const { return _comm.comm(); }

Expand Down Expand Up @@ -886,4 +920,43 @@ void MatrixCSR<Scalar, V, W, X>::mult(la::Vector<Scalar>& x,
}
}

/// @brief Out-of-line implementation of MatrixCSR::multT.
/// @see MatrixCSR::multT for the full specification.
template <typename Scalar, typename V, typename W, typename X>
void MatrixCSR<Scalar, V, W, X>::multT(la::Vector<Scalar>& x,
la::Vector<Scalar>& y) const
{
std::int32_t nrowslocal = num_owned_rows();
std::span<const std::int64_t> Arow_ptr(row_ptr().data(), nrowslocal + 1);
std::span<const std::int32_t> Acols(cols().data(), Arow_ptr[nrowslocal]);
std::span<const std::int64_t> Aoff_diag_offset(off_diag_offset().data(),
nrowslocal);
std::span<const Scalar> Avalues(values().data(), Arow_ptr[nrowslocal]);

std::span<const Scalar> _x = x.array();
std::span<Scalar> _y = y.array();

std::span<const std::int64_t> Arow_begin(Arow_ptr.data(), nrowslocal);
std::span<const std::int64_t> Arow_end(Arow_ptr.data() + 1, nrowslocal);

// Compute ghost region contribution and scatter back
int ncolslocal = index_map(1)->size_local();
std::fill(std::next(_y.begin(), ncolslocal), _y.end(), 0.0);
if (_bs[1] == 1)
impl::spmvT<Scalar, 1>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y,
_bs[0], 1);
else
impl::spmvT<Scalar, -1>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y,
_bs[0], _bs[1]);

y.scatter_rev(std::plus<Scalar>{});

if (_bs[1] == 1)
impl::spmvT<Scalar, 1>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y,
_bs[0], 1);
else
impl::spmvT<Scalar, -1>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x,
_y, _bs[0], _bs[1]);
}

} // namespace dolfinx::la
Loading
Loading