From 43ee0acef248d8db99dc49edebf0c919c3d95ccf Mon Sep 17 00:00:00 2001 From: Michel Pelletier Date: Thu, 11 Jun 2026 18:24:27 -0700 Subject: [PATCH 1/2] Add LAGraph_Matrix_Sum utility LAGraph_Matrix_Sum combines an array of GrB_Matrix objects into a single matrix using a binary operator to resolve duplicate entries. It computes the total number of entries across all inputs, allocates one shared tuple buffer (I, J, X), extracts the tuples of each input matrix into the buffer, and calls GrB_Matrix_build with the dup operator to combine duplicate (i,j) coordinates. With dup = GrB_PLUS_FP64 (for example) this computes the element-wise sum of all the matrices. All input matrices must have identical dimensions and the same built-in type; user-defined types return GrB_NOT_IMPLEMENTED. Includes a test (src/test/test_Matrix_Sum.c) covering correctness, all built-in type branches, error handling, and a brutal malloc-failure variant. --- Config/LAGraph.h.in | 41 +++++ include/LAGraph.h | 41 +++++ src/test/test_Matrix_Sum.c | 274 +++++++++++++++++++++++++++++++ src/utility/LAGraph_Matrix_Sum.c | 170 +++++++++++++++++++ 4 files changed, 526 insertions(+) create mode 100644 src/test/test_Matrix_Sum.c create mode 100644 src/utility/LAGraph_Matrix_Sum.c diff --git a/Config/LAGraph.h.in b/Config/LAGraph.h.in index a46c06c8eb..77aed33386 100644 --- a/Config/LAGraph.h.in +++ b/Config/LAGraph.h.in @@ -1572,6 +1572,47 @@ int LAGraph_Matrix_Structure char *msg ) ; +//------------------------------------------------------------------------------ +// LAGraph_Matrix_Sum: sum an array of matrices with a binary operator +//------------------------------------------------------------------------------ + +/** LAGraph_Matrix_Sum: combines an array of matrices into a single matrix C. + * The tuples of all input matrices are concatenated into a single buffer and + * passed to GrB_Matrix_build, using the binary operator dup to combine any + * duplicate (i,j) entries. With dup = GrB_PLUS_FP64 (for example) this + * computes the element-wise sum of all the matrices. + * + * All input matrices must have identical dimensions and identical built-in + * type; C is created with that same type and dimensions. + * + * @param[out] C the resulting matrix. + * @param[in] Matrices array of nmatrices input matrices. + * @param[in] nmatrices number of matrices in Matrices (must be >= 1). + * @param[in] dup binary operator to combine duplicate (i,j) entries; + * if NULL, duplicates are handled per GrB_Matrix_build. + * @param[in,out] msg any error messages. + * + * @retval GrB_SUCCESS if successful. + * @retval GrB_NULL_POINTER if C, Matrices, or any Matrices[k] is NULL. + * @retval GrB_INVALID_VALUE if nmatrices is zero. + * @retval GrB_DIMENSION_MISMATCH if the inputs do not all share dimensions. + * @retval GrB_DOMAIN_MISMATCH if the inputs do not all share a type. + * @retval GrB_NOT_IMPLEMENTED if the matrix type is user-defined. + * @returns any GraphBLAS errors that may have been encountered. + */ + +LAGRAPH_PUBLIC +int LAGraph_Matrix_Sum +( + // output: + GrB_Matrix *C, // result = combination of all input matrices + // input: + GrB_Matrix *Matrices, // array of nmatrices input matrices + GrB_Index nmatrices, // number of matrices in the array (must be >= 1) + GrB_BinaryOp dup, // operator to combine duplicate (i,j) entries + char *msg +) ; + //------------------------------------------------------------------------------ // LAGraph_Vector_Structure: return the structure of a vector //------------------------------------------------------------------------------ diff --git a/include/LAGraph.h b/include/LAGraph.h index ea88c7c7dc..073f2bdf6c 100644 --- a/include/LAGraph.h +++ b/include/LAGraph.h @@ -1572,6 +1572,47 @@ int LAGraph_Matrix_Structure char *msg ) ; +//------------------------------------------------------------------------------ +// LAGraph_Matrix_Sum: sum an array of matrices with a binary operator +//------------------------------------------------------------------------------ + +/** LAGraph_Matrix_Sum: combines an array of matrices into a single matrix C. + * The tuples of all input matrices are concatenated into a single buffer and + * passed to GrB_Matrix_build, using the binary operator dup to combine any + * duplicate (i,j) entries. With dup = GrB_PLUS_FP64 (for example) this + * computes the element-wise sum of all the matrices. + * + * All input matrices must have identical dimensions and identical built-in + * type; C is created with that same type and dimensions. + * + * @param[out] C the resulting matrix. + * @param[in] Matrices array of nmatrices input matrices. + * @param[in] nmatrices number of matrices in Matrices (must be >= 1). + * @param[in] dup binary operator to combine duplicate (i,j) entries; + * if NULL, duplicates are handled per GrB_Matrix_build. + * @param[in,out] msg any error messages. + * + * @retval GrB_SUCCESS if successful. + * @retval GrB_NULL_POINTER if C, Matrices, or any Matrices[k] is NULL. + * @retval GrB_INVALID_VALUE if nmatrices is zero. + * @retval GrB_DIMENSION_MISMATCH if the inputs do not all share dimensions. + * @retval GrB_DOMAIN_MISMATCH if the inputs do not all share a type. + * @retval GrB_NOT_IMPLEMENTED if the matrix type is user-defined. + * @returns any GraphBLAS errors that may have been encountered. + */ + +LAGRAPH_PUBLIC +int LAGraph_Matrix_Sum +( + // output: + GrB_Matrix *C, // result = combination of all input matrices + // input: + GrB_Matrix *Matrices, // array of nmatrices input matrices + GrB_Index nmatrices, // number of matrices in the array (must be >= 1) + GrB_BinaryOp dup, // operator to combine duplicate (i,j) entries + char *msg +) ; + //------------------------------------------------------------------------------ // LAGraph_Vector_Structure: return the structure of a vector //------------------------------------------------------------------------------ diff --git a/src/test/test_Matrix_Sum.c b/src/test/test_Matrix_Sum.c new file mode 100644 index 0000000000..a46e78eb98 --- /dev/null +++ b/src/test/test_Matrix_Sum.c @@ -0,0 +1,274 @@ +//------------------------------------------------------------------------------ +// LAGraph/src/test/test_Matrix_Sum.c: test LAGraph_Matrix_Sum +//------------------------------------------------------------------------------ + +// LAGraph, (c) 2019-2025 by The LAGraph Contributors, All Rights Reserved. +// SPDX-License-Identifier: BSD-2-Clause +// +// For additional details (including references to third party source code and +// other files) see the LICENSE file or contact permission@sei.cmu.edu. See +// Contributors.txt for a full list of contributors. Created, in part, with +// funding and support from the U.S. Government (see Acknowledgments.txt file). +// DM22-0790 + +// Contributed by Michel Pelletier. + +//------------------------------------------------------------------------------ + +#include "LAGraph_test.h" +#include "LG_internal.h" + +//------------------------------------------------------------------------------ +// global variables +//------------------------------------------------------------------------------ + +char msg [LAGRAPH_MSG_LEN] ; +GrB_Matrix A = NULL, B = NULL, C = NULL, Expected = NULL ; + +//------------------------------------------------------------------------------ +// setup and teardown +//------------------------------------------------------------------------------ + +void setup (void) +{ + OK (LAGraph_Init (msg)) ; +} + +void teardown (void) +{ + OK (LAGraph_Finalize (msg)) ; +} + +//------------------------------------------------------------------------------ +// make_A, make_B: construct two 4x4 FP64 matrices with overlapping and +// distinct (i,j) coordinates +//------------------------------------------------------------------------------ + +static void make_AB (void) +{ + // A has entries at (0,0), (1,1), (2,2), (0,3) + GrB_Index Ai [ ] = { 0, 1, 2, 0 } ; + GrB_Index Aj [ ] = { 0, 1, 2, 3 } ; + double Ax [ ] = { 1, 2, 3, 4 } ; + OK (GrB_Matrix_new (&A, GrB_FP64, 4, 4)) ; + OK (GrB_Matrix_build_FP64 (A, Ai, Aj, Ax, 4, NULL)) ; + + // B has entries at (0,0), (1,1), (3,3), (2,0) + // shares (0,0) and (1,1) with A, distinct elsewhere + GrB_Index Bi [ ] = { 0, 1, 3, 2 } ; + GrB_Index Bj [ ] = { 0, 1, 3, 0 } ; + double Bx [ ] = { 10, 20, 30, 40 } ; + OK (GrB_Matrix_new (&B, GrB_FP64, 4, 4)) ; + OK (GrB_Matrix_build_FP64 (B, Bi, Bj, Bx, 4, NULL)) ; +} + +//------------------------------------------------------------------------------ +// test_Matrix_Sum: basic correctness +//------------------------------------------------------------------------------ + +void test_Matrix_Sum (void) +{ + setup ( ) ; + + make_AB ( ) ; + + // Expected = A + B (element-wise add, set union) + OK (GrB_Matrix_new (&Expected, GrB_FP64, 4, 4)) ; + OK (GrB_eWiseAdd (Expected, NULL, NULL, GrB_PLUS_FP64, A, B, NULL)) ; + + // C = sum of {A, B} using GrB_PLUS_FP64 for duplicates + GrB_Matrix Mats [2] = { A, B } ; + OK (LAGraph_Matrix_Sum (&C, Mats, 2, GrB_PLUS_FP64, msg)) ; + + bool ok ; + OK (LAGraph_Matrix_IsEqual (&ok, C, Expected, msg)) ; + TEST_CHECK (ok) ; + TEST_MSG ("sum of {A,B} did not equal A+B") ; + OK (GrB_free (&C)) ; + + // single-matrix array: result must be a copy of A + GrB_Matrix One [1] = { A } ; + OK (LAGraph_Matrix_Sum (&C, One, 1, GrB_PLUS_FP64, msg)) ; + OK (LAGraph_Matrix_IsEqual (&ok, C, A, msg)) ; + TEST_CHECK (ok) ; + TEST_MSG ("sum of {A} did not equal A") ; + OK (GrB_free (&C)) ; + + // an empty matrix in the array contributes nothing + GrB_Matrix Empty = NULL ; + OK (GrB_Matrix_new (&Empty, GrB_FP64, 4, 4)) ; + GrB_Matrix WithEmpty [3] = { A, Empty, B } ; + OK (LAGraph_Matrix_Sum (&C, WithEmpty, 3, GrB_PLUS_FP64, msg)) ; + OK (LAGraph_Matrix_IsEqual (&ok, C, Expected, msg)) ; + TEST_CHECK (ok) ; + TEST_MSG ("sum of {A,Empty,B} did not equal A+B") ; + OK (GrB_free (&Empty)) ; + OK (GrB_free (&C)) ; + + OK (GrB_free (&A)) ; + OK (GrB_free (&B)) ; + OK (GrB_free (&Expected)) ; + + teardown ( ) ; +} + +//------------------------------------------------------------------------------ +// test_Matrix_Sum_types: exercise every built-in type branch +//------------------------------------------------------------------------------ + +void test_Matrix_Sum_types (void) +{ + setup ( ) ; + + // (type, GrB_PLUS operator) pairs, one per built-in type branch + GrB_Type types [ ] = { GrB_BOOL, GrB_INT8, GrB_INT16, GrB_INT32, + GrB_INT64, GrB_UINT8, GrB_UINT16, GrB_UINT32, GrB_UINT64, + GrB_FP32, GrB_FP64 } ; + GrB_BinaryOp ops [ ] = { GrB_LOR, GrB_PLUS_INT8, GrB_PLUS_INT16, + GrB_PLUS_INT32, GrB_PLUS_INT64, GrB_PLUS_UINT8, GrB_PLUS_UINT16, + GrB_PLUS_UINT32, GrB_PLUS_UINT64, GrB_PLUS_FP32, GrB_PLUS_FP64 } ; + int ntypes = sizeof (types) / sizeof (types [0]) ; + + GrB_Index Ai [ ] = { 0, 1, 2 } ; + GrB_Index Aj [ ] = { 0, 1, 0 } ; + GrB_Index Bi [ ] = { 0, 1, 0 } ; + GrB_Index Bj [ ] = { 0, 1, 2 } ; + + for (int t = 0 ; t < ntypes ; t++) + { + OK (GrB_Matrix_new (&A, types [t], 3, 3)) ; + OK (GrB_Matrix_new (&B, types [t], 3, 3)) ; + // build with INT32 values; GraphBLAS typecasts to the matrix type + int32_t Ax [ ] = { 1, 1, 1 } ; + int32_t Bx [ ] = { 1, 1, 1 } ; + OK (GrB_Matrix_build_INT32 (A, Ai, Aj, Ax, 3, NULL)) ; + OK (GrB_Matrix_build_INT32 (B, Bi, Bj, Bx, 3, NULL)) ; + + OK (GrB_Matrix_new (&Expected, types [t], 3, 3)) ; + OK (GrB_eWiseAdd (Expected, NULL, NULL, ops [t], A, B, NULL)) ; + + GrB_Matrix Mats [2] = { A, B } ; + OK (LAGraph_Matrix_Sum (&C, Mats, 2, ops [t], msg)) ; + + bool ok ; + OK (LAGraph_Matrix_IsEqual (&ok, C, Expected, msg)) ; + TEST_CHECK (ok) ; + TEST_MSG ("type index %d failed", t) ; + + OK (GrB_free (&A)) ; + OK (GrB_free (&B)) ; + OK (GrB_free (&C)) ; + OK (GrB_free (&Expected)) ; + } + + teardown ( ) ; +} + +//------------------------------------------------------------------------------ +// test_Matrix_Sum_brutal +//------------------------------------------------------------------------------ + +#if LG_BRUTAL_TESTS +void test_Matrix_Sum_brutal (void) +{ + OK (LG_brutal_setup (msg)) ; + + make_AB ( ) ; + OK (GrB_Matrix_new (&Expected, GrB_FP64, 4, 4)) ; + OK (GrB_eWiseAdd (Expected, NULL, NULL, GrB_PLUS_FP64, A, B, NULL)) ; + + GrB_Matrix Mats [2] = { A, B } ; + LG_BRUTAL (LAGraph_Matrix_Sum (&C, Mats, 2, GrB_PLUS_FP64, msg)) ; + + bool ok ; + OK (LAGraph_Matrix_IsEqual (&ok, C, Expected, msg)) ; + TEST_CHECK (ok) ; + + OK (GrB_free (&A)) ; + OK (GrB_free (&B)) ; + OK (GrB_free (&C)) ; + OK (GrB_free (&Expected)) ; + + OK (LG_brutal_teardown (msg)) ; +} +#endif + +//------------------------------------------------------------------------------ +// test_Matrix_Sum_failures: test error handling +//------------------------------------------------------------------------------ + +void test_Matrix_Sum_failures (void) +{ + setup ( ) ; + + make_AB ( ) ; + GrB_Matrix Mats [2] = { A, B } ; + int result ; + + // NULL output + result = LAGraph_Matrix_Sum (NULL, Mats, 2, GrB_PLUS_FP64, msg) ; + TEST_CHECK (result == GrB_NULL_POINTER) ; + + // NULL Matrices array + C = NULL ; + result = LAGraph_Matrix_Sum (&C, NULL, 2, GrB_PLUS_FP64, msg) ; + TEST_CHECK (result == GrB_NULL_POINTER) ; + TEST_CHECK (C == NULL) ; + + // nmatrices == 0 + result = LAGraph_Matrix_Sum (&C, Mats, 0, GrB_PLUS_FP64, msg) ; + TEST_CHECK (result == GrB_INVALID_VALUE) ; + + // NULL entry in the array + GrB_Matrix WithNull [2] = { A, NULL } ; + result = LAGraph_Matrix_Sum (&C, WithNull, 2, GrB_PLUS_FP64, msg) ; + TEST_CHECK (result == GrB_NULL_POINTER) ; + + // dimension mismatch + GrB_Matrix D = NULL ; + OK (GrB_Matrix_new (&D, GrB_FP64, 5, 5)) ; + GrB_Matrix BadDim [2] = { A, D } ; + result = LAGraph_Matrix_Sum (&C, BadDim, 2, GrB_PLUS_FP64, msg) ; + TEST_CHECK (result == GrB_DIMENSION_MISMATCH) ; + OK (GrB_free (&D)) ; + + // type mismatch + GrB_Matrix E = NULL ; + OK (GrB_Matrix_new (&E, GrB_INT32, 4, 4)) ; + GrB_Matrix BadType [2] = { A, E } ; + result = LAGraph_Matrix_Sum (&C, BadType, 2, GrB_PLUS_FP64, msg) ; + TEST_CHECK (result == GrB_DOMAIN_MISMATCH) ; + OK (GrB_free (&E)) ; + + // user-defined type not supported + typedef struct { double x, y ; } udt_t ; + GrB_Type UDT = NULL ; + OK (GrB_Type_new (&UDT, sizeof (udt_t))) ; + GrB_Matrix U = NULL ; + OK (GrB_Matrix_new (&U, UDT, 4, 4)) ; + GrB_Matrix Udt [1] = { U } ; + result = LAGraph_Matrix_Sum (&C, Udt, 1, NULL, msg) ; + TEST_CHECK (result == GrB_NOT_IMPLEMENTED) ; + OK (GrB_free (&U)) ; + OK (GrB_free (&UDT)) ; + + OK (GrB_free (&A)) ; + OK (GrB_free (&B)) ; + + teardown ( ) ; +} + +//------------------------------------------------------------------------------ +// TEST_LIST: the list of tasks for this entire test +//------------------------------------------------------------------------------ + +TEST_LIST = +{ + { "Matrix_Sum", test_Matrix_Sum }, + { "Matrix_Sum_types", test_Matrix_Sum_types }, + { "Matrix_Sum_failures", test_Matrix_Sum_failures }, + #if LG_BRUTAL_TESTS + { "Matrix_Sum_brutal", test_Matrix_Sum_brutal }, + #endif + { NULL, NULL } +} ; diff --git a/src/utility/LAGraph_Matrix_Sum.c b/src/utility/LAGraph_Matrix_Sum.c new file mode 100644 index 0000000000..7532f8fb72 --- /dev/null +++ b/src/utility/LAGraph_Matrix_Sum.c @@ -0,0 +1,170 @@ +//------------------------------------------------------------------------------ +// LAGraph_Matrix_Sum: sum an array of matrices with a binary operator +//------------------------------------------------------------------------------ + +// LAGraph, (c) 2019-2025 by The LAGraph Contributors, All Rights Reserved. +// SPDX-License-Identifier: BSD-2-Clause +// +// For additional details (including references to third party source code and +// other files) see the LICENSE file or contact permission@sei.cmu.edu. See +// Contributors.txt for a full list of contributors. Created, in part, with +// funding and support from the U.S. Government (see Acknowledgments.txt file). +// DM22-0790 + +// Contributed by Michel Pelletier. + +//------------------------------------------------------------------------------ + +// LAGraph_Matrix_Sum combines an array of matrices into a single matrix C. It +// computes the total number of entries across all inputs, allocates a single +// tuple buffer (I, J, X) large enough to hold every entry, extracts the tuples +// of each input matrix into that buffer, and then calls GrB_Matrix_build with +// the binary operator dup to combine any duplicate (i,j) entries. With dup = +// GrB_PLUS_FP64 (for example) this computes the element-wise sum of all input +// matrices. + +// All input matrices must have identical dimensions and identical built-in +// type; C is created with that same type and dimensions. + +#define LG_FREE_WORK \ +{ \ + LAGraph_Free ((void **) &I, NULL) ; \ + LAGraph_Free ((void **) &J, NULL) ; \ + LAGraph_Free ((void **) &X, NULL) ; \ +} + +#define LG_FREE_ALL \ +{ \ + LG_FREE_WORK ; \ + GrB_free (C) ; \ +} + +#include "LG_internal.h" + +int LAGraph_Matrix_Sum +( + // output: + GrB_Matrix *C, // result = combination of all input matrices + // input: + GrB_Matrix *Matrices, // array of nmatrices input matrices + GrB_Index nmatrices, // number of matrices in the array (must be >= 1) + GrB_BinaryOp dup, // operator to combine duplicate (i,j) entries + char *msg +) +{ + + //-------------------------------------------------------------------------- + // check inputs + //-------------------------------------------------------------------------- + + LG_CLEAR_MSG ; + GrB_Index *I = NULL, *J = NULL ; + void *X = NULL ; + LG_ASSERT_MSG (C != NULL, GrB_NULL_POINTER, "&C != NULL") ; + LG_ASSERT (Matrices != NULL, GrB_NULL_POINTER) ; + (*C) = NULL ; + LG_ASSERT_MSG (nmatrices >= 1, GrB_INVALID_VALUE, + "nmatrices must be >= 1") ; + LG_ASSERT (Matrices [0] != NULL, GrB_NULL_POINTER) ; + + //-------------------------------------------------------------------------- + // determine the reference dimensions and type from the first matrix + //-------------------------------------------------------------------------- + + GrB_Index nrows, ncols ; + int32_t typecode ; + GRB_TRY (GrB_Matrix_nrows (&nrows, Matrices [0])) ; + GRB_TRY (GrB_Matrix_ncols (&ncols, Matrices [0])) ; + GRB_TRY (GrB_get (Matrices [0], &typecode, GrB_EL_TYPE_CODE)) ; + + //-------------------------------------------------------------------------- + // validate every matrix and accumulate the total number of entries + //-------------------------------------------------------------------------- + + GrB_Index total = 0 ; + for (GrB_Index k = 0 ; k < nmatrices ; k++) + { + GrB_Matrix Ak = Matrices [k] ; + LG_ASSERT (Ak != NULL, GrB_NULL_POINTER) ; + GrB_Index r, c, n ; + int32_t code ; + GRB_TRY (GrB_Matrix_nrows (&r, Ak)) ; + GRB_TRY (GrB_Matrix_ncols (&c, Ak)) ; + LG_ASSERT_MSG (r == nrows && c == ncols, GrB_DIMENSION_MISMATCH, + "all input matrices must have the same dimensions") ; + GRB_TRY (GrB_get (Ak, &code, GrB_EL_TYPE_CODE)) ; + LG_ASSERT_MSG (code == typecode, GrB_DOMAIN_MISMATCH, + "all input matrices must have the same type") ; + GRB_TRY (GrB_Matrix_nvals (&n, Ak)) ; + total += n ; + } + + //-------------------------------------------------------------------------- + // allocate the shared row/column index buffers (guard against size 0) + //-------------------------------------------------------------------------- + + GrB_Index alloc = (total == 0) ? 1 : total ; + LG_TRY (LAGraph_Malloc ((void **) &I, alloc, sizeof (GrB_Index), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &J, alloc, sizeof (GrB_Index), msg)) ; + + //-------------------------------------------------------------------------- + // extract tuples from every matrix, then build the result + //-------------------------------------------------------------------------- + + // For each built-in type: allocate the value buffer X with the correct + // element size, extract the tuples of every input matrix into the shared + // buffer at the running offset, create C, and build it with the dup + // operator to combine duplicate (i,j) entries. + + #define LG_SUM_CASE(code, ctype, gtype, suffix) \ + case code : \ + { \ + ctype *Xt = NULL ; \ + LG_TRY (LAGraph_Malloc ((void **) &Xt, alloc, sizeof (ctype), \ + msg)) ; \ + X = (void *) Xt ; \ + GrB_Index offset = 0 ; \ + for (GrB_Index k = 0 ; k < nmatrices ; k++) \ + { \ + GrB_Index n, got ; \ + GRB_TRY (GrB_Matrix_nvals (&n, Matrices [k])) ; \ + if (n == 0) continue ; \ + got = n ; \ + GRB_TRY (GrB_Matrix_extractTuples_ ## suffix ( \ + I + offset, J + offset, Xt + offset, &got, \ + Matrices [k])) ; \ + offset += n ; \ + } \ + GRB_TRY (GrB_Matrix_new (C, gtype, nrows, ncols)) ; \ + GRB_TRY (GrB_Matrix_build_ ## suffix (*C, I, J, Xt, total, \ + dup)) ; \ + } \ + break ; + + switch (typecode) + { + LG_SUM_CASE (GrB_BOOL_CODE, bool, GrB_BOOL, BOOL ) + LG_SUM_CASE (GrB_INT8_CODE, int8_t, GrB_INT8, INT8 ) + LG_SUM_CASE (GrB_INT16_CODE, int16_t, GrB_INT16, INT16 ) + LG_SUM_CASE (GrB_INT32_CODE, int32_t, GrB_INT32, INT32 ) + LG_SUM_CASE (GrB_INT64_CODE, int64_t, GrB_INT64, INT64 ) + LG_SUM_CASE (GrB_UINT8_CODE, uint8_t, GrB_UINT8, UINT8 ) + LG_SUM_CASE (GrB_UINT16_CODE, uint16_t, GrB_UINT16, UINT16) + LG_SUM_CASE (GrB_UINT32_CODE, uint32_t, GrB_UINT32, UINT32) + LG_SUM_CASE (GrB_UINT64_CODE, uint64_t, GrB_UINT64, UINT64) + LG_SUM_CASE (GrB_FP32_CODE, float, GrB_FP32, FP32 ) + LG_SUM_CASE (GrB_FP64_CODE, double, GrB_FP64, FP64 ) + default : + LG_ASSERT_MSG (false, GrB_NOT_IMPLEMENTED, + "user-defined types are not supported") ; + } + + #undef LG_SUM_CASE + + //-------------------------------------------------------------------------- + // free workspace and return result + //-------------------------------------------------------------------------- + + LG_FREE_WORK ; + return (GrB_SUCCESS) ; +} From 1895a885bea2cfc9cae03430bce42d565b7d8244 Mon Sep 17 00:00:00 2001 From: Michel Pelletier Date: Thu, 11 Jun 2026 21:35:34 -0700 Subject: [PATCH 2/2] Parallelize tuple extraction in LAGraph_Matrix_Sum The per-matrix extraction loop precomputes an offset (prefix-sum) array so each matrix's tuples occupy a disjoint region of the shared (I, J, X) buffer. That removes the loop-carried offset dependency and lets the extraction run across LG_nthreads_outer threads with OpenMP; each GrB_Matrix_extractTuples is still parallelized internally by GraphBLAS with LG_nthreads_inner threads (the two-level nested model). The public signature is unchanged: the thread count follows the usual LAGraph convention via LAGraph_SetNumThreads. Because GRB_TRY cannot return out of an OpenMP region, the first extraction error is captured under a critical section and checked after the loop. Adds test_Matrix_Sum_parallel, which sums many overlapping matrices with multiple outer threads and compares against an independently accumulated result. --- src/test/test_Matrix_Sum.c | 59 ++++++++++++++++++++++++++ src/utility/LAGraph_Matrix_Sum.c | 71 ++++++++++++++++++++++---------- 2 files changed, 109 insertions(+), 21 deletions(-) diff --git a/src/test/test_Matrix_Sum.c b/src/test/test_Matrix_Sum.c index a46e78eb98..3863ced258 100644 --- a/src/test/test_Matrix_Sum.c +++ b/src/test/test_Matrix_Sum.c @@ -193,6 +193,64 @@ void test_Matrix_Sum_brutal (void) } #endif +//------------------------------------------------------------------------------ +// test_Matrix_Sum_parallel: exercise the parallel extraction path +//------------------------------------------------------------------------------ + +// Sum many matrices with multiple outer threads and confirm the result matches +// an independently accumulated expected. This stresses the disjoint-offset +// extraction under real outer parallelism. + +void test_Matrix_Sum_parallel (void) +{ + setup ( ) ; + + // request 4 outer threads (saving and restoring the prior settings) + int save_outer, save_inner ; + OK (LAGraph_GetNumThreads (&save_outer, &save_inner, msg)) ; + OK (LAGraph_SetNumThreads (4, save_inner, msg)) ; + + #define NMAT 16 + GrB_Matrix Mats [NMAT] ; + OK (GrB_Matrix_new (&Expected, GrB_FP64, 10, 10)) ; + + for (int k = 0 ; k < NMAT ; k++) + { + // each matrix has 3 distinct (i,j) entries; the (7,7) entry is shared + // by every matrix and others overlap across matrices, so duplicates + // must be summed when the matrices are combined + GrB_Index Mi [ ] = { (GrB_Index) (k % 10), 2, 7 } ; + GrB_Index Mj [ ] = { 3, (GrB_Index) (k % 10), 7 } ; + double Mx [ ] = { (double) (k + 1), 1, 2 } ; + Mats [k] = NULL ; + OK (GrB_Matrix_new (&Mats [k], GrB_FP64, 10, 10)) ; + OK (GrB_Matrix_build_FP64 (Mats [k], Mi, Mj, Mx, 3, NULL)) ; + // accumulate into Expected independently + OK (GrB_eWiseAdd (Expected, NULL, NULL, GrB_PLUS_FP64, Expected, + Mats [k], NULL)) ; + } + + OK (LAGraph_Matrix_Sum (&C, Mats, NMAT, GrB_PLUS_FP64, msg)) ; + + bool ok ; + OK (LAGraph_Matrix_IsEqual (&ok, C, Expected, msg)) ; + TEST_CHECK (ok) ; + TEST_MSG ("parallel sum of %d matrices did not match expected", NMAT) ; + + for (int k = 0 ; k < NMAT ; k++) + { + OK (GrB_free (&Mats [k])) ; + } + OK (GrB_free (&C)) ; + OK (GrB_free (&Expected)) ; + #undef NMAT + + // restore the original thread settings + OK (LAGraph_SetNumThreads (save_outer, save_inner, msg)) ; + + teardown ( ) ; +} + //------------------------------------------------------------------------------ // test_Matrix_Sum_failures: test error handling //------------------------------------------------------------------------------ @@ -266,6 +324,7 @@ TEST_LIST = { { "Matrix_Sum", test_Matrix_Sum }, { "Matrix_Sum_types", test_Matrix_Sum_types }, + { "Matrix_Sum_parallel", test_Matrix_Sum_parallel }, { "Matrix_Sum_failures", test_Matrix_Sum_failures }, #if LG_BRUTAL_TESTS { "Matrix_Sum_brutal", test_Matrix_Sum_brutal }, diff --git a/src/utility/LAGraph_Matrix_Sum.c b/src/utility/LAGraph_Matrix_Sum.c index 7532f8fb72..26dbf5f773 100644 --- a/src/utility/LAGraph_Matrix_Sum.c +++ b/src/utility/LAGraph_Matrix_Sum.c @@ -16,10 +16,14 @@ //------------------------------------------------------------------------------ // LAGraph_Matrix_Sum combines an array of matrices into a single matrix C. It -// computes the total number of entries across all inputs, allocates a single -// tuple buffer (I, J, X) large enough to hold every entry, extracts the tuples -// of each input matrix into that buffer, and then calls GrB_Matrix_build with -// the binary operator dup to combine any duplicate (i,j) entries. With dup = +// computes the total number of entries across all inputs and the offset at +// which each matrix's tuples begin in a single shared tuple buffer (I, J, X) +// large enough to hold every entry. Because each matrix writes to a disjoint +// region of that buffer, the per-matrix extraction is parallelized across +// LG_nthreads_outer threads with OpenMP; SuiteSparse:GraphBLAS parallelizes +// each GrB_Matrix_extractTuples internally with LG_nthreads_inner threads. The +// concatenated tuples are then passed to GrB_Matrix_build, using the binary +// operator dup to combine any duplicate (i,j) entries. With dup = // GrB_PLUS_FP64 (for example) this computes the element-wise sum of all input // matrices. @@ -31,6 +35,7 @@ LAGraph_Free ((void **) &I, NULL) ; \ LAGraph_Free ((void **) &J, NULL) ; \ LAGraph_Free ((void **) &X, NULL) ; \ + LAGraph_Free ((void **) &Offsets, NULL) ; \ } #define LG_FREE_ALL \ @@ -58,7 +63,7 @@ int LAGraph_Matrix_Sum //-------------------------------------------------------------------------- LG_CLEAR_MSG ; - GrB_Index *I = NULL, *J = NULL ; + GrB_Index *I = NULL, *J = NULL, *Offsets = NULL ; void *X = NULL ; LG_ASSERT_MSG (C != NULL, GrB_NULL_POINTER, "&C != NULL") ; LG_ASSERT (Matrices != NULL, GrB_NULL_POINTER) ; @@ -78,10 +83,17 @@ int LAGraph_Matrix_Sum GRB_TRY (GrB_get (Matrices [0], &typecode, GrB_EL_TYPE_CODE)) ; //-------------------------------------------------------------------------- - // validate every matrix and accumulate the total number of entries + // validate every matrix and compute where its tuples begin in the buffer //-------------------------------------------------------------------------- - GrB_Index total = 0 ; + // Offsets [k] is the position in (I, J, X) at which the tuples of matrix k + // begin; Offsets [k+1] - Offsets [k] is its number of entries. This prefix + // sum gives each matrix a disjoint buffer region so the extraction below + // can run in parallel without any data races. + + LG_TRY (LAGraph_Malloc ((void **) &Offsets, nmatrices + 1, + sizeof (GrB_Index), msg)) ; + Offsets [0] = 0 ; for (GrB_Index k = 0 ; k < nmatrices ; k++) { GrB_Matrix Ak = Matrices [k] ; @@ -96,8 +108,9 @@ int LAGraph_Matrix_Sum LG_ASSERT_MSG (code == typecode, GrB_DOMAIN_MISMATCH, "all input matrices must have the same type") ; GRB_TRY (GrB_Matrix_nvals (&n, Ak)) ; - total += n ; + Offsets [k+1] = Offsets [k] + n ; } + GrB_Index total = Offsets [nmatrices] ; //-------------------------------------------------------------------------- // allocate the shared row/column index buffers (guard against size 0) @@ -107,14 +120,25 @@ int LAGraph_Matrix_Sum LG_TRY (LAGraph_Malloc ((void **) &I, alloc, sizeof (GrB_Index), msg)) ; LG_TRY (LAGraph_Malloc ((void **) &J, alloc, sizeof (GrB_Index), msg)) ; + //-------------------------------------------------------------------------- + // determine the number of threads for the outer extraction loop + //-------------------------------------------------------------------------- + + int nthreads = LG_nthreads_outer ; + nthreads = LAGRAPH_MIN (nthreads, (int) nmatrices) ; + nthreads = LAGRAPH_MAX (nthreads, 1) ; + //-------------------------------------------------------------------------- // extract tuples from every matrix, then build the result //-------------------------------------------------------------------------- // For each built-in type: allocate the value buffer X with the correct - // element size, extract the tuples of every input matrix into the shared - // buffer at the running offset, create C, and build it with the dup - // operator to combine duplicate (i,j) entries. + // element size, extract the tuples of every input matrix into its disjoint + // region of the shared buffer (in parallel, since the regions never + // overlap), create C, and build it with the dup operator to combine + // duplicate (i,j) entries. GRB_TRY cannot be used inside an OpenMP region + // (it returns from the function), so the first error is captured into + // sum_status under a critical section and checked after the region. #define LG_SUM_CASE(code, ctype, gtype, suffix) \ case code : \ @@ -123,18 +147,23 @@ int LAGraph_Matrix_Sum LG_TRY (LAGraph_Malloc ((void **) &Xt, alloc, sizeof (ctype), \ msg)) ; \ X = (void *) Xt ; \ - GrB_Index offset = 0 ; \ - for (GrB_Index k = 0 ; k < nmatrices ; k++) \ + int sum_status = GrB_SUCCESS ; \ + int64_t k ; \ + _Pragma ("omp parallel for num_threads(nthreads) schedule(dynamic,1)") \ + for (k = 0 ; k < (int64_t) nmatrices ; k++) \ { \ - GrB_Index n, got ; \ - GRB_TRY (GrB_Matrix_nvals (&n, Matrices [k])) ; \ - if (n == 0) continue ; \ - got = n ; \ - GRB_TRY (GrB_Matrix_extractTuples_ ## suffix ( \ - I + offset, J + offset, Xt + offset, &got, \ - Matrices [k])) ; \ - offset += n ; \ + GrB_Index off = Offsets [k] ; \ + GrB_Index got = Offsets [k+1] - off ; \ + if (got == 0) continue ; \ + GrB_Info info = GrB_Matrix_extractTuples_ ## suffix ( \ + I + off, J + off, Xt + off, &got, Matrices [k]) ; \ + if (info < GrB_SUCCESS) \ + { \ + _Pragma ("omp critical") \ + { if (sum_status >= GrB_SUCCESS) sum_status = info ; } \ + } \ } \ + GRB_TRY (sum_status) ; \ GRB_TRY (GrB_Matrix_new (C, gtype, nrows, ncols)) ; \ GRB_TRY (GrB_Matrix_build_ ## suffix (*C, I, J, Xt, total, \ dup)) ; \