feat: expand astype dtype support — float32, int64, bool bidirectional

peng.li24 · peng.li24 · commit 95986fd4fa44 · 2026-06-03T00:43:47.000+08:00
Supported conversions (5 dtypes × 4 targets = 20 combos):
  float64 ↔ float32, int32, int64, bool
  float32 → float64, int32, int64, bool
  int32 → float64, float32, int64, bool
  int64 → float64, float32, int32, bool
  bool → float64, float32, int32, int64

Add 7 new tests: f64→f32, f32→f64, f64→int64,
  int→f64, int→f32, bool→f64, bool→int

Test count: 468 → 475
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -8,7 +8,7 @@ on:
     branches: [master]
 
 jobs:
-  # ---- Test: build module + run 468 precision tests --------------------------
+  # ---- Test: build module + run 475 precision tests --------------------------
   test:
     runs-on: ubuntu-22.04
     steps:
diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@ We created `numpycpp` to keep NumPy's familiar usage patterns while letting C++
 
 `numpycpp` is a **header-only C++ library** implementing numpy's core API (`numpy.*`, `numpy.linalg.*`, `numpy.einsum`) with **bit-level precision alignment**. Raw pointer + size interface. Zero external dependencies — pure C++17 standard library.
 
-All APIs are tested against Python numpy under strict bit-level comparison: every IEEE 754 float bit must match exactly (468 tests, float64 + float32).
+All APIs are tested against Python numpy under strict bit-level comparison: every IEEE 754 float bit must match exactly (475 tests, float64 + float32).
 
 **Bit-exact math** is achieved by resolving numpy's own math functions from `_multiarray_umath.so` at runtime. The SVML bridge auto-detects your CPU and selects the same path numpy uses: AVX‑512 SVML (`__svml_exp8`) when available, or scalar `npy_exp`/`npy_log`/etc. otherwise. AVX‑512 intrinsics are isolated behind `__attribute__((target))` — the binary is safe on any x86_64 CPU (no SIGILL). Every transcendental function produces the exact same IEEE 754 bits as numpy on **all architectures**.
 
@@ -89,12 +89,12 @@ Add `-Ipath/to/numpycpp` to your compiler flags and include the headers directly
 ### Testing
 
 The test suite verifies **bit-level precision alignment** between every C++ function and Python numpy.
-No tolerance, no `atol`/`rtol` — raw IEEE 754 bits must match exactly. 468 tests, float64 + float32.
+No tolerance, no `atol`/`rtol` — raw IEEE 754 bits must match exactly. 475 tests, float64 + float32.
 
 ```bash
 cd tests
 make                    # compile C++ test module
-make test               # run all 468 tests (silent mode: only failures print)
+make test               # run all 475 tests (silent mode: only failures print)
 ```
 
 To run with verbose output:
@@ -142,7 +142,7 @@ LDFLAGS   = -shared -ldl
 ### Alignment status
 
 The table below reflects the current bit-level parity between `numpycpp` C++ and Python numpy.
-All 468 tests pass under strict IEEE 754 bit comparison (float64 + float32).
+All 475 tests pass under strict IEEE 754 bit comparison (float64 + float32).
 
 ✅ = bit-exact on ALL architectures (SVML bridge with runtime CPU dispatch).
 
@@ -189,7 +189,7 @@ numpycpp/
 │   └── einsum_py.h
 ├── tests/              # bit-level precision tests + test module
 │   ├── module.cpp      # pybind11 module for testing
-│   ├── test_all.py     # single entry — all APIs, 468 tests, float64+float32
+│   ├── test_all.py     # single entry — all APIs, 475 tests, float64+float32
 │   ├── conftest.py     # silent-mode output suppression
 │   └── Makefile
 ├── CMakeLists.txt      # build & .deb packaging
diff --git a/pycpp/core_py.h b/pycpp/core_py.h
@@ -11,6 +11,7 @@
 #include "../numpy/core.h"
 #include <vector>
 #include <cstring>
+#include <cstdint>
 
 namespace py = pybind11;
 
@@ -116,30 +117,132 @@ inline py::array ones_like(const py::array& arr, const std::string& dtype) {
 inline py::array astype(const py::array& arr, const std::string& dtype) {
     auto buf = arr.request();
     auto dt = arr.dtype();
+
     // float64 input
     if (dt.is(py::dtype::of<double>())) {
-        if (dtype == "int" || dtype == "int32" || dtype == "int64") {
-            py::array_t<int> result(buf.shape);
-            astype<int, double>(static_cast<const double*>(buf.ptr),
-                                static_cast<int*>(result.request().ptr), buf.size);
-            return result;
+        auto* src = static_cast<const double*>(buf.ptr);
+        if (dtype == "float32" || dtype == "float") {
+            py::array_t<float> r(buf.shape);
+            astype<float, double>(src, static_cast<float*>(r.request().ptr), buf.size);
+            return r;
+        }
+        if (dtype == "int" || dtype == "int32") {
+            py::array_t<int> r(buf.shape);
+            astype<int, double>(src, static_cast<int*>(r.request().ptr), buf.size);
+            return r;
+        }
+        if (dtype == "int64") {
+            py::array_t<int64_t> r(buf.shape);
+            astype<int64_t, double>(src, static_cast<int64_t*>(r.request().ptr), buf.size);
+            return r;
+        }
+        if (dtype == "bool") {
+            py::array_t<bool> r(buf.shape);
+            astype<bool, double>(src, static_cast<bool*>(r.request().ptr), buf.size);
+            return r;
+        }
+    }
+
+    // float32 input
+    if (dt.is(py::dtype::of<float>())) {
+        auto* src = static_cast<const float*>(buf.ptr);
+        if (dtype == "float64" || dtype == "double") {
+            py::array_t<double> r(buf.shape);
+            astype<double, float>(src, static_cast<double*>(r.request().ptr), buf.size);
+            return r;
+        }
+        if (dtype == "int" || dtype == "int32") {
+            py::array_t<int> r(buf.shape);
+            astype<int, float>(src, static_cast<int*>(r.request().ptr), buf.size);
+            return r;
+        }
+        if (dtype == "int64") {
+            py::array_t<int64_t> r(buf.shape);
+            astype<int64_t, float>(src, static_cast<int64_t*>(r.request().ptr), buf.size);
+            return r;
         }
         if (dtype == "bool") {
-            py::array_t<bool> result(buf.shape);
-            astype<bool, double>(static_cast<const double*>(buf.ptr),
-                                 static_cast<bool*>(result.request().ptr), buf.size);
-            return result;
+            py::array_t<bool> r(buf.shape);
+            astype<bool, float>(src, static_cast<bool*>(r.request().ptr), buf.size);
+            return r;
         }
     }
-    // int input
+
+    // int32 input
     if (dt.is(py::dtype::of<int>())) {
+        auto* src = static_cast<const int*>(buf.ptr);
+        if (dtype == "float64" || dtype == "double") {
+            py::array_t<double> r(buf.shape);
+            astype<double, int>(src, static_cast<double*>(r.request().ptr), buf.size);
+            return r;
+        }
+        if (dtype == "float32" || dtype == "float") {
+            py::array_t<float> r(buf.shape);
+            astype<float, int>(src, static_cast<float*>(r.request().ptr), buf.size);
+            return r;
+        }
+        if (dtype == "int64") {
+            py::array_t<int64_t> r(buf.shape);
+            astype<int64_t, int>(src, static_cast<int64_t*>(r.request().ptr), buf.size);
+            return r;
+        }
         if (dtype == "bool") {
-            py::array_t<bool> result(buf.shape);
-            astype<bool, int>(static_cast<const int*>(buf.ptr),
-                              static_cast<bool*>(result.request().ptr), buf.size);
-            return result;
+            py::array_t<bool> r(buf.shape);
+            astype<bool, int>(src, static_cast<bool*>(r.request().ptr), buf.size);
+            return r;
         }
     }
+
+    // int64 input
+    if (dt.is(py::dtype::of<int64_t>())) {
+        auto* src = static_cast<const int64_t*>(buf.ptr);
+        if (dtype == "float64" || dtype == "double") {
+            py::array_t<double> r(buf.shape);
+            astype<double, int64_t>(src, static_cast<double*>(r.request().ptr), buf.size);
+            return r;
+        }
+        if (dtype == "float32" || dtype == "float") {
+            py::array_t<float> r(buf.shape);
+            astype<float, int64_t>(src, static_cast<float*>(r.request().ptr), buf.size);
+            return r;
+        }
+        if (dtype == "int" || dtype == "int32") {
+            py::array_t<int> r(buf.shape);
+            astype<int, int64_t>(src, static_cast<int*>(r.request().ptr), buf.size);
+            return r;
+        }
+        if (dtype == "bool") {
+            py::array_t<bool> r(buf.shape);
+            astype<bool, int64_t>(src, static_cast<bool*>(r.request().ptr), buf.size);
+            return r;
+        }
+    }
+
+    // bool input
+    if (dt.is(py::dtype::of<bool>())) {
+        auto* src = static_cast<const bool*>(buf.ptr);
+        if (dtype == "float64" || dtype == "double") {
+            py::array_t<double> r(buf.shape);
+            astype<double, bool>(src, static_cast<double*>(r.request().ptr), buf.size);
+            return r;
+        }
+        if (dtype == "float32" || dtype == "float") {
+            py::array_t<float> r(buf.shape);
+            astype<float, bool>(src, static_cast<float*>(r.request().ptr), buf.size);
+            return r;
+        }
+        if (dtype == "int" || dtype == "int32") {
+            py::array_t<int> r(buf.shape);
+            astype<int, bool>(src, static_cast<int*>(r.request().ptr), buf.size);
+            return r;
+        }
+        if (dtype == "int64") {
+            py::array_t<int64_t> r(buf.shape);
+            astype<int64_t, bool>(src, static_cast<int64_t*>(r.request().ptr), buf.size);
+            return r;
+        }
+    }
+
     throw std::runtime_error("astype: unsupported conversion " + std::string(py::str(dt)) + " -> " + dtype);
 }
 
diff --git a/tests/test_all.py b/tests/test_all.py
@@ -471,6 +471,34 @@ def test_astype_bool_from_int(cpp):
     a = np.array([[0, 1, -1], [42, 0, 0]], dtype=np.int32)
     assert_bit_aligned(cpp.astype(a, "bool"), a.astype(bool), "astype_bool_from_int")
 
+def test_astype_f64_to_f32(cpp):
+    a = np.array([1.5, 2.7, -3.1], dtype=np.float64)
+    assert_bit_aligned(cpp.astype(a, "float32"), a.astype(np.float32), "astype_f64_to_f32")
+
+def test_astype_f32_to_f64(cpp):
+    a = np.array([1.5, 2.7, -3.1], dtype=np.float32)
+    assert_bit_aligned(cpp.astype(a, "float64"), a.astype(np.float64), "astype_f32_to_f64")
+
+def test_astype_f64_to_int64(cpp):
+    a = np.array([1.5, 2.7, -3.1], dtype=np.float64)
+    assert_bit_aligned(cpp.astype(a, "int64"), a.astype(np.int64), "astype_f64_to_int64")
+
+def test_astype_int_to_f64(cpp):
+    a = np.array([1, 2, -3], dtype=np.int32)
+    assert_bit_aligned(cpp.astype(a, "float64"), a.astype(np.float64), "astype_int_to_f64")
+
+def test_astype_int_to_f32(cpp):
+    a = np.array([1, 2, -3], dtype=np.int32)
+    assert_bit_aligned(cpp.astype(a, "float32"), a.astype(np.float32), "astype_int_to_f32")
+
+def test_astype_bool_to_f64(cpp):
+    a = np.array([True, False, True], dtype=bool)
+    assert_bit_aligned(cpp.astype(a, "float64"), a.astype(np.float64), "astype_bool_to_f64")
+
+def test_astype_bool_to_int(cpp):
+    a = np.array([True, False, True, False], dtype=bool)
+    assert_bit_aligned(cpp.astype(a, "int"), a.astype(np.int32), "astype_bool_to_int")
+
 def test_truncate_to_float32(cpp):
     a = np.array([1.0 / 3.0, np.pi, np.sqrt(2.0)], dtype=np.float64)
     py_r = a.astype(np.float32).astype(np.float64)