From fbdcf9a5fd0fa94baed57c9c5e89e737418c8a49 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Tue, 20 Jan 2026 11:53:15 -0600
Subject: [PATCH 1/3] works again

---
 src/integrals/ao_integrals/coulomb_metric.cpp | 38 +++++++-------
 src/integrals/ao_integrals/uq_driver.cpp      | 52 +++++++++++--------
 src/integrals/libint/libint.cpp               | 16 +++---
 .../ao_integrals/ao_integrals_driver.cpp      | 30 ++++++-----
 .../integrals/ao_integrals/test_uq_driver.cpp |  5 +-
 tests/cxx/unit/integrals/testing.hpp          |  5 +-
 6 files changed, 83 insertions(+), 63 deletions(-)
diff --git a/src/integrals/ao_integrals/coulomb_metric.cpp b/src/integrals/ao_integrals/coulomb_metric.cpp
index 36dac412..0e6122f1 100644
--- a/src/integrals/ao_integrals/coulomb_metric.cpp
+++ b/src/integrals/ao_integrals/coulomb_metric.cpp
@@ -24,38 +24,40 @@ using pt = simde::ERI2;
 namespace {
 
 struct Kernel {
+    using const_shape_view =
+      tensorwrapper::buffer::Contiguous::const_shape_view;
+
+    explicit Kernel(const_shape_view shape) : m_shape(shape) {}
+
     template<typename FloatType>
-    auto run(const tensorwrapper::buffer::BufferBase& I,
-             parallelzone::runtime::RuntimeView& rv) {
-        // Unwrap buffer info
-        tensorwrapper::allocator::Eigen<FloatType> allocator(rv);
-        const auto& eigen_I = allocator.rebind(I);
-        const auto* pI      = eigen_I.get_immutable_data();
-        const auto& shape_I = eigen_I.layout().shape().as_smooth();
-        auto rows           = shape_I.extent(0);
-        auto cols           = shape_I.extent(1);
+    simde::type::tensor operator()(const std::span<FloatType> I) {
+        using clean_type = std::decay_t<FloatType>;
+        auto rows        = m_shape.extent(0);
+        auto cols        = m_shape.extent(1);
 
         // Cholesky Decomp
         constexpr auto rmajor = Eigen::RowMajor;
         constexpr auto edynam = Eigen::Dynamic;
-        using matrix_type = Eigen::Matrix<FloatType, edynam, edynam, rmajor>;
+        using matrix_type = Eigen::Matrix<clean_type, edynam, edynam, rmajor>;
         using map_type    = Eigen::Map<const matrix_type>;
-        map_type I_map(pI, rows, cols);
+        map_type I_map(I.data(), rows, cols);
         Eigen::LLT<matrix_type> lltOfI(I_map);
         matrix_type U    = lltOfI.matrixU();
         matrix_type Linv = U.inverse().transpose();
 
         // Wrap result
         tensorwrapper::shape::Smooth matrix_shape{rows, cols};
-        tensorwrapper::layout::Physical matrix_layout(matrix_shape);
-        auto pM_buffer = allocator.allocate(matrix_layout);
+        auto pM_buffer =
+          tensorwrapper::buffer::make_contiguous<FloatType>(matrix_shape);
         for(decltype(rows) i = 0; i < rows; ++i) {
             for(decltype(cols) j = 0; j < cols; ++j) {
-                pM_buffer->set_elem({i, j}, Linv(i, j));
+                pM_buffer.set_elem({i, j}, Linv(i, j));
             }
         }
         return simde::type::tensor(matrix_shape, std::move(pM_buffer));
     }
+
+    const_shape_view m_shape;
 };
 
 auto desc = R"(
@@ -79,11 +81,9 @@ MODULE_RUN(CoulombMetric) {
     const auto& I = eri2_mod.run_as<pt>(braket);
 
     // Compute metric
-    using tensorwrapper::utilities::floating_point_dispatch;
-    auto r = get_runtime();
-    Kernel k;
-    const auto& I_buffer = I.buffer();
-    auto M               = floating_point_dispatch(k, I_buffer, r);
+    const auto& I_buffer = tensorwrapper::buffer::make_contiguous(I.buffer());
+    Kernel k(I_buffer.shape());
+    auto M = tensorwrapper::buffer::visit_contiguous_buffer(k, I_buffer);
 
     auto rv = results();
     return pt::wrap_results(rv, M);
diff --git a/src/integrals/ao_integrals/uq_driver.cpp b/src/integrals/ao_integrals/uq_driver.cpp
index afd7213a..5b309a85 100644
--- a/src/integrals/ao_integrals/uq_driver.cpp
+++ b/src/integrals/ao_integrals/uq_driver.cpp
@@ -16,36 +16,43 @@
 
 #include "ao_integrals.hpp"
 
+using namespace tensorwrapper;
+
 namespace integrals::ao_integrals {
 namespace {
 
 struct Kernel {
-    using buffer_base_type = tensorwrapper::buffer::BufferBase;
-    template<typename FloatType>
-    auto run(const buffer_base_type& t, const buffer_base_type& error) {
-        tensorwrapper::Tensor rv;
-
-        if constexpr(tensorwrapper::types::is_uncertain_v<FloatType>) {
-            using alloc_type = tensorwrapper::allocator::Eigen<FloatType>;
-            alloc_type alloc(t.allocator().runtime());
-
-            const auto& t_eigen     = alloc.rebind(t);
-            const auto& error_eigen = alloc.rebind(error);
+    using shape_type = buffer::Contiguous::shape_type;
+    Kernel(shape_type shape) : m_shape(std::move(shape)) {}
+
+    template<typename FloatType0, typename FloatType1>
+    Tensor operator()(const std::span<FloatType0> t,
+                      const std::span<FloatType1> error) {
+        throw std::runtime_error(
+          "UQ Integrals Driver kernel only supports same float types");
+    }
 
-            auto rv_buffer = alloc.allocate(t_eigen.layout());
-            for(std::size_t i = 0; i < t_eigen.size(); ++i) {
-                const auto elem       = t_eigen.get_data(i).mean();
-                const auto elem_error = error_eigen.get_data(i).mean();
-                rv_buffer->set_data(i, FloatType(elem, elem_error));
+    template<typename FloatType>
+    auto operator()(const std::span<FloatType> t,
+                    const std::span<FloatType> error) {
+        Tensor rv;
+
+        if constexpr(types::is_uncertain_v<FloatType>) {
+            auto rv_buffer = buffer::make_contiguous<FloatType>(m_shape);
+            auto rv_data   = buffer::get_raw_data<FloatType>(rv_buffer);
+            for(std::size_t i = 0; i < t.size(); ++i) {
+                const auto elem       = t[i].mean();
+                const auto elem_error = error[i].mean();
+                rv_data[i]            = FloatType(elem, elem_error);
             }
 
-            const auto& shape = t_eigen.layout().shape();
-            rv = tensorwrapper::Tensor(shape, std::move(rv_buffer));
+            rv = tensorwrapper::Tensor(m_shape, std::move(rv_buffer));
         } else {
             throw std::runtime_error("Expected an uncertain type");
         }
         return rv;
     }
+    shape_type m_shape;
 };
 
 const auto desc = R"(
@@ -86,9 +93,12 @@ MODULE_RUN(UQDriver) {
     simde::type::tensor error;
     error("m,n,l,s") = t("m,n,l,s") - t_0("m,n,l,s");
 
-    using tensorwrapper::utilities::floating_point_dispatch;
-    Kernel k;
-    auto t_w_error = floating_point_dispatch(k, t.buffer(), error.buffer());
+    using buffer::visit_contiguous_buffer;
+    shape::Smooth shape = t.buffer().layout().shape().as_smooth().make_smooth();
+    Kernel k(shape);
+    auto t_buffer     = make_contiguous(t.buffer());
+    auto error_buffer = make_contiguous(error.buffer());
+    auto t_w_error    = visit_contiguous_buffer(k, t_buffer, error_buffer);
 
     auto rv = results();
     return eri_pt::wrap_results(rv, t_w_error);
diff --git a/src/integrals/libint/libint.cpp b/src/integrals/libint/libint.cpp
index 7cd0c278..185c6886 100644
--- a/src/integrals/libint/libint.cpp
+++ b/src/integrals/libint/libint.cpp
@@ -60,13 +60,13 @@ auto build_eigen_buffer(const std::vector<libint2::BasisSet>& basis_sets,
     std::vector<decltype(N)> dims(N);
     for(decltype(N) i = 0; i < N; ++i) dims[i] = basis_sets[i].nbf();
 
-    using shape_t  = tensorwrapper::shape::Smooth;
-    using layout_t = tensorwrapper::layout::Physical;
+    using namespace tensorwrapper;
+    using shape_t  = shape::Smooth;
+    using layout_t = layout::Physical;
 
     shape_t s{dims.begin(), dims.end()};
-    layout_t l(s);
-    tensorwrapper::allocator::Eigen<FloatType> alloc(rv);
-    return alloc.construct(l, initial_value);
+    auto buffer = buffer::make_contiguous<FloatType>(s, initial_value);
+    return std::make_unique<buffer::Contiguous>(std::move(buffer));
 }
 
 template<std::size_t N, typename FloatType>
@@ -98,6 +98,8 @@ auto fill_tensor(const std::vector<libint2::BasisSet>& basis_sets,
 #ifdef _OPENMP
 #pragma omp parallel for
 #endif
+    auto data = pbuffer->get_mutable_data();
+    auto span = wtf::buffer::contiguous_buffer_cast<FloatType>(data);
     for(size_type i_pair = 0; i_pair != num_shell_combinations; ++i_pair) {
         auto thread_id = get_thread_num();
 
@@ -117,8 +119,8 @@ auto fill_tensor(const std::vector<libint2::BasisSet>& basis_sets,
             auto ord   = detail_::shells2ord(basis_sets, shells);
             auto n_ord = ord.size();
             for(decltype(n_ord) i_ord = 0; i_ord < n_ord; ++i_ord) {
-                auto update = pbuffer->get_data(ord[i_ord]) + vals[i_ord];
-                pbuffer->set_data(ord[i_ord], update);
+                auto update      = span[ord[i_ord]] + vals[i_ord];
+                span[ord[i_ord]] = update;
             }
         }
     }
diff --git a/tests/cxx/unit/integrals/ao_integrals/ao_integrals_driver.cpp b/tests/cxx/unit/integrals/ao_integrals/ao_integrals_driver.cpp
index ae0f5473..479375d3 100644
--- a/tests/cxx/unit/integrals/ao_integrals/ao_integrals_driver.cpp
+++ b/tests/cxx/unit/integrals/ao_integrals/ao_integrals_driver.cpp
@@ -21,20 +21,26 @@ using simde::type::tensor;
 namespace {
 
 void compare_matrices(const tensor& A, const tensor& A_corr) {
-    using alloc_type          = tensorwrapper::allocator::Eigen<double>;
-    const auto& A_buffer      = alloc_type::rebind(A.buffer());
-    const auto& A_corr_buffer = alloc_type::rebind(A_corr.buffer());
+    using namespace tensorwrapper::buffer;
+    using namespace wtf::fp;
+    const auto& A_buffer      = make_contiguous(A.buffer());
+    const auto& A_corr_buffer = make_contiguous(A_corr.buffer());
 
     const auto tol = 1E-6;
-    auto A00       = A_buffer.get_elem({0, 0});
-    auto A01       = A_buffer.get_elem({0, 1});
-    auto A10       = A_buffer.get_elem({1, 0});
-    auto A11       = A_buffer.get_elem({1, 1});
-
-    REQUIRE(A00 == Catch::Approx(A_corr_buffer.get_elem({0, 0})).margin(tol));
-    REQUIRE(A01 == Catch::Approx(A_corr_buffer.get_elem({0, 1})).margin(tol));
-    REQUIRE(A10 == Catch::Approx(A_corr_buffer.get_elem({1, 0})).margin(tol));
-    REQUIRE(A11 == Catch::Approx(A_corr_buffer.get_elem({1, 1})).margin(tol));
+    using fp_type  = double;
+    auto A00       = float_cast<fp_type>(A_buffer.get_elem({0, 0}));
+    auto A01       = float_cast<fp_type>(A_buffer.get_elem({0, 1}));
+    auto A10       = float_cast<fp_type>(A_buffer.get_elem({1, 0}));
+    auto A11       = float_cast<fp_type>(A_buffer.get_elem({1, 1}));
+    auto A00_corr  = float_cast<fp_type>(A_corr_buffer.get_elem({0, 0}));
+    auto A01_corr  = float_cast<fp_type>(A_corr_buffer.get_elem({0, 1}));
+    auto A10_corr  = float_cast<fp_type>(A_corr_buffer.get_elem({1, 0}));
+    auto A11_corr  = float_cast<fp_type>(A_corr_buffer.get_elem({1, 1}));
+
+    REQUIRE(A00 == Catch::Approx(A00_corr).margin(tol));
+    REQUIRE(A01 == Catch::Approx(A01_corr).margin(tol));
+    REQUIRE(A10 == Catch::Approx(A10_corr).margin(tol));
+    REQUIRE(A11 == Catch::Approx(A11_corr).margin(tol));
 }
 
 } // namespace
diff --git a/tests/cxx/unit/integrals/ao_integrals/test_uq_driver.cpp b/tests/cxx/unit/integrals/ao_integrals/test_uq_driver.cpp
index 37a8945b..055f1280 100644
--- a/tests/cxx/unit/integrals/ao_integrals/test_uq_driver.cpp
+++ b/tests/cxx/unit/integrals/ao_integrals/test_uq_driver.cpp
@@ -16,14 +16,15 @@
 
 #include "../testing.hpp"
 
+using namespace tensorwrapper;
+
 template<typename FloatType>
 auto corr_answer(const simde::type::tensor& T) {
     if constexpr(std::is_same_v<FloatType, double>) {
         return T;
     } else {
         simde::type::tensor T_corr(T);
-        using alloc_type  = tensorwrapper::allocator::Eigen<FloatType>;
-        auto& corr_buffer = alloc_type::rebind(T_corr.buffer());
+        auto& corr_buffer = buffer::make_contiguous(T_corr.buffer());
         corr_buffer.set_elem({0, 0, 0, 0}, FloatType{0.774606, 0});
         corr_buffer.set_elem({0, 0, 0, 1}, FloatType{0.265558, 2.49687e-06});
         corr_buffer.set_elem({0, 0, 1, 0}, FloatType{0.265558, 2.49687e-06});
diff --git a/tests/cxx/unit/integrals/testing.hpp b/tests/cxx/unit/integrals/testing.hpp
index d1f4d338..812b05c3 100644
--- a/tests/cxx/unit/integrals/testing.hpp
+++ b/tests/cxx/unit/integrals/testing.hpp
@@ -31,9 +31,10 @@ namespace test {
 template<typename FloatType, std::size_t N, std::size_t... Is>
 auto eigen_tensor_(const tensorwrapper::buffer::BufferBase& buffer,
                    std::array<int, N> extents, std::index_sequence<Is...>) {
-    const auto& b = tensorwrapper::allocator::Eigen<FloatType>::rebind(buffer);
+    using namespace tensorwrapper;
+    const auto pdata = buffer::get_raw_data<FloatType>(buffer);
     using eigen_type = Eigen::Tensor<const FloatType, N, Eigen::RowMajor>;
-    return Eigen::TensorMap<eigen_type>(b.get_immutable_data(), extents[Is]...);
+    return Eigen::TensorMap<eigen_type>(pdata.data(), extents[Is]...);
 }
 
 // Checking eigen outputs

From c266aa5e2a5423d43d09ddabfd62424b467c4b83 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Fri, 30 Jan 2026 11:45:02 -0600
Subject: [PATCH 2/3] remove unused typedef

---
 src/integrals/libint/libint.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/integrals/libint/libint.cpp b/src/integrals/libint/libint.cpp
index 185c6886..997fe576 100644
--- a/src/integrals/libint/libint.cpp
+++ b/src/integrals/libint/libint.cpp
@@ -61,8 +61,7 @@ auto build_eigen_buffer(const std::vector<libint2::BasisSet>& basis_sets,
     for(decltype(N) i = 0; i < N; ++i) dims[i] = basis_sets[i].nbf();
 
     using namespace tensorwrapper;
-    using shape_t  = shape::Smooth;
-    using layout_t = layout::Physical;
+    using shape_t = shape::Smooth;
 
     shape_t s{dims.begin(), dims.end()};
     auto buffer = buffer::make_contiguous<FloatType>(s, initial_value);

From 1a195901a07f670a83518566e188a69e8acc4ea5 Mon Sep 17 00:00:00 2001
From: "Ryan M. Richard" <rrichard@ameslab.gov>
Date: Fri, 30 Jan 2026 12:36:10 -0600
Subject: [PATCH 3/3] avoid breaking up the omp parallel for

---
 src/integrals/libint/libint.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/integrals/libint/libint.cpp b/src/integrals/libint/libint.cpp
index 997fe576..d391c2a0 100644
--- a/src/integrals/libint/libint.cpp
+++ b/src/integrals/libint/libint.cpp
@@ -94,11 +94,11 @@ auto fill_tensor(const std::vector<libint2::BasisSet>& basis_sets,
 
     // Fill in values
     auto pbuffer = build_eigen_buffer<FloatType>(basis_sets, rv, thresh);
+    auto data    = pbuffer->get_mutable_data();
+    auto span    = wtf::buffer::contiguous_buffer_cast<FloatType>(data);
 #ifdef _OPENMP
 #pragma omp parallel for
 #endif
-    auto data = pbuffer->get_mutable_data();
-    auto span = wtf::buffer::contiguous_buffer_cast<FloatType>(data);
     for(size_type i_pair = 0; i_pair != num_shell_combinations; ++i_pair) {
         auto thread_id = get_thread_num();