diff --git a/src/csrc/umath/matmul.cpp b/src/csrc/umath/matmul.cpp
index 70a82bd..60649d9 100644
--- a/src/csrc/umath/matmul.cpp
+++ b/src/csrc/umath/matmul.cpp
@@ -20,6 +20,7 @@ extern "C" {
 #include "scalar.h"
 #include "dtype.h"
 #include "ops.hpp"
+#include "constants.hpp"
 #include "umath/matmul.h"
 #include "umath/promoters.hpp"
 #include "quadblas_interface.h"
@@ -32,25 +33,23 @@ quad_matmul_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[
     QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0];
     QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1];
 
-    // QBLAS only supports SLEEF backend
+    static const char *non_sleef_msg =
+            "QBLAS-accelerated quad ufuncs only support the SLEEF backend. "
+            "Please raise the issue at SwayamInSync/QBLAS for longdouble support";
+
     if (descr_in1->backend != BACKEND_SLEEF || descr_in2->backend != BACKEND_SLEEF) {
-        PyErr_SetString(PyExc_NotImplementedError,
-                        "QBLAS-accelerated matmul only supports SLEEF backend. "
-                        "Please raise the issue at SwayamInSync/QBLAS for longdouble support");
+        PyErr_SetString(PyExc_NotImplementedError, non_sleef_msg);
         return (NPY_CASTING)-1;
     }
 
-    // Both inputs must use SLEEF backend
     QuadBackendType target_backend = BACKEND_SLEEF;
     NPY_CASTING casting = NPY_NO_CASTING;
 
-    // Set up input descriptors
     for (int i = 0; i < 2; i++) {
         Py_INCREF(given_descrs[i]);
         loop_descrs[i] = given_descrs[i];
     }
 
-    // Set up output descriptor
     if (given_descrs[2] == NULL) {
         loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend);
         if (!loop_descrs[2]) {
@@ -60,9 +59,7 @@ quad_matmul_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[
     else {
         QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2];
         if (descr_out->backend != target_backend) {
-            PyErr_SetString(PyExc_NotImplementedError,
-                        "QBLAS-accelerated matmul only supports SLEEF backend. "
-                        "Please raise the issue at SwayamInSync/QBLAS for longdouble support");
+            PyErr_SetString(PyExc_NotImplementedError, non_sleef_msg);
             return (NPY_CASTING)-1;
         }
         else {
@@ -126,8 +123,8 @@ quad_matmul_strided_loop_aligned(PyArrayMethod_Context *context, char *const dat
     }
 
     MatmulOperationType op_type = determine_operation_type(m, n, p);
-    Sleef_quad alpha = Sleef_cast_from_doubleq1(1.0);
-    Sleef_quad beta = Sleef_cast_from_doubleq1(0.0);
+    Sleef_quad alpha = QUAD_PRECISION_ONE;
+    Sleef_quad beta = QUAD_PRECISION_ZERO;
 
     char *A = data[0];
     char *B = data[1];
@@ -216,8 +213,8 @@ quad_matmul_strided_loop_unaligned(PyArrayMethod_Context *context, char *const d
     }
 
     MatmulOperationType op_type = determine_operation_type(m, n, p);
-    Sleef_quad alpha = Sleef_cast_from_doubleq1(1.0);
-    Sleef_quad beta = Sleef_cast_from_doubleq1(0.0);
+    Sleef_quad alpha = QUAD_PRECISION_ONE;
+    Sleef_quad beta = QUAD_PRECISION_ZERO;
 
     char *A = data[0];
     char *B = data[1];
@@ -315,6 +312,157 @@ quad_matmul_strided_loop_unaligned(PyArrayMethod_Context *context, char *const d
     return 0;
 }
 
+// vecdot: signature (n),(n)->()
+
+static int
+quad_vecdot_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
+                                 npy_intp const dimensions[], npy_intp const strides[],
+                                 NpyAuxData *auxdata)
+{
+    npy_intp N = dimensions[0];  // outer (broadcast) loop length
+    npy_intp n = dimensions[1];  // core dim length
+
+    npy_intp x_outer_stride = strides[0];
+    npy_intp y_outer_stride = strides[1];
+    npy_intp out_outer_stride = strides[2];
+    npy_intp x_n_stride = strides[3];
+    npy_intp y_n_stride = strides[4];
+
+    char *x = data[0];
+    char *y = data[1];
+    char *out = data[2];
+
+    size_t incx = x_n_stride / sizeof(Sleef_quad);
+    size_t incy = y_n_stride / sizeof(Sleef_quad);
+
+    for (npy_intp i = 0; i < N; i++) {
+        Sleef_quad *x_ptr = (Sleef_quad *)x;
+        Sleef_quad *y_ptr = (Sleef_quad *)y;
+        Sleef_quad *out_ptr = (Sleef_quad *)out;
+
+        if (n == 0) {
+            *out_ptr = QUAD_PRECISION_ZERO;
+        }
+        else {
+            int result = qblas_dot(n, x_ptr, incx, y_ptr, incy, out_ptr);
+            if (result != 0) {
+                PyErr_SetString(PyExc_RuntimeError, "QBLAS vecdot operation failed");
+                return -1;
+            }
+        }
+
+        x += x_outer_stride;
+        y += y_outer_stride;
+        out += out_outer_stride;
+    }
+
+    return 0;
+}
+
+static int
+quad_vecdot_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
+                                   npy_intp const dimensions[], npy_intp const strides[],
+                                   NpyAuxData *auxdata)
+{
+    npy_intp N = dimensions[0];
+    npy_intp n = dimensions[1];
+
+    npy_intp x_outer_stride = strides[0];
+    npy_intp y_outer_stride = strides[1];
+    npy_intp out_outer_stride = strides[2];
+
+    char *x = data[0];
+    char *y = data[1];
+    char *out = data[2];
+
+    if (n == 0) {
+        Sleef_quad zero = QUAD_PRECISION_ZERO;
+        for (npy_intp i = 0; i < N; i++) {
+            memcpy(out + i * out_outer_stride, &zero, sizeof(Sleef_quad));
+        }
+        return 0;
+    }
+
+    Sleef_quad *temp_x = new Sleef_quad[n];
+    Sleef_quad *temp_y = new Sleef_quad[n];
+
+    int result = 0;
+    for (npy_intp i = 0; i < N; i++) {
+        memcpy(temp_x, x, n * sizeof(Sleef_quad));
+        memcpy(temp_y, y, n * sizeof(Sleef_quad));
+
+        Sleef_quad sum;
+        result = qblas_dot(n, temp_x, 1, temp_y, 1, &sum);
+        if (result != 0) break;
+
+        memcpy(out, &sum, sizeof(Sleef_quad));
+
+        x += x_outer_stride;
+        y += y_outer_stride;
+        out += out_outer_stride;
+    }
+
+    delete[] temp_x;
+    delete[] temp_y;
+
+    if (result != 0) {
+        PyErr_SetString(PyExc_RuntimeError, "QBLAS vecdot operation failed");
+        return -1;
+    }
+    return 0;
+}
+
+static int
+naive_vecdot_strided_loop(PyArrayMethod_Context *context, char *const data[],
+                          npy_intp const dimensions[], npy_intp const strides[],
+                          NpyAuxData *auxdata)
+{
+    npy_intp N = dimensions[0];
+    npy_intp n = dimensions[1];
+
+    npy_intp x_outer_stride = strides[0];
+    npy_intp y_outer_stride = strides[1];
+    npy_intp out_outer_stride = strides[2];
+    npy_intp x_n_stride = strides[3];
+    npy_intp y_n_stride = strides[4];
+
+    QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
+    QuadBackendType backend = descr->backend;
+
+    char *x = data[0];
+    char *y = data[1];
+    char *out = data[2];
+
+    for (npy_intp i = 0; i < N; i++) {
+        if (backend == BACKEND_SLEEF) {
+            Sleef_quad sum = QUAD_PRECISION_ZERO;
+            for (npy_intp k = 0; k < n; k++) {
+                Sleef_quad a_val, b_val;
+                memcpy(&a_val, x + k * x_n_stride, sizeof(Sleef_quad));
+                memcpy(&b_val, y + k * y_n_stride, sizeof(Sleef_quad));
+                sum = Sleef_fmaq1_u05(a_val, b_val, sum);
+            }
+            memcpy(out, &sum, sizeof(Sleef_quad));
+        }
+        else {
+            long double sum = 0.0L;
+            for (npy_intp k = 0; k < n; k++) {
+                long double a_val, b_val;
+                memcpy(&a_val, x + k * x_n_stride, sizeof(long double));
+                memcpy(&b_val, y + k * y_n_stride, sizeof(long double));
+                sum += a_val * b_val;
+            }
+            memcpy(out, &sum, sizeof(long double));
+        }
+
+        x += x_outer_stride;
+        y += y_outer_stride;
+        out += out_outer_stride;
+    }
+
+    return 0;
+}
+
 static int
 naive_matmul_strided_loop(PyArrayMethod_Context *context, char *const data[],
                           npy_intp const dimensions[], npy_intp const strides[],
@@ -349,7 +497,7 @@ naive_matmul_strided_loop(PyArrayMethod_Context *context, char *const data[],
             char *C_ij = C + i * C_row_stride + j * C_col_stride;
 
             if (backend == BACKEND_SLEEF) {
-                Sleef_quad sum = Sleef_cast_from_doubleq1(0.0);
+                Sleef_quad sum = QUAD_PRECISION_ZERO;
 
                 for (npy_intp k = 0; k < n; k++) {
                     char *A_ik = A + i * A_row_stride + k * A_col_stride;
@@ -385,32 +533,26 @@ naive_matmul_strided_loop(PyArrayMethod_Context *context, char *const data[],
     return 0;
 }
 
-int
-init_matmul_ops(PyObject *numpy)
+static int
+register_matmul_like_ufunc(PyObject *numpy, const char *ufunc_name, const char *spec_name,
+                           PyArrayMethod_StridedLoop *aligned_loop,
+                           PyArrayMethod_StridedLoop *unaligned_loop)
 {
-    PyObject *ufunc = PyObject_GetAttrString(numpy, "matmul");
+    PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name);
     if (ufunc == NULL) {
         return -1;
     }
 
     PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &QuadPrecDType};
 
-#ifndef DISABLE_QUADBLAS
-
     PyType_Slot slots[] = {
             {NPY_METH_resolve_descriptors, (void *)&quad_matmul_resolve_descriptors},
-            {NPY_METH_strided_loop, (void *)&quad_matmul_strided_loop_aligned},
-            {NPY_METH_unaligned_strided_loop, (void *)&quad_matmul_strided_loop_unaligned},
+            {NPY_METH_strided_loop, (void *)aligned_loop},
+            {NPY_METH_unaligned_strided_loop, (void *)unaligned_loop},
             {0, NULL}};
-#else
-    PyType_Slot slots[] = {{NPY_METH_resolve_descriptors, (void *)&quad_matmul_resolve_descriptors},
-                           {NPY_METH_strided_loop, (void *)&naive_matmul_strided_loop},
-                           {NPY_METH_unaligned_strided_loop, (void *)&naive_matmul_strided_loop},
-                           {0, NULL}};
-#endif  // DISABLE_QUADBLAS
 
     PyArrayMethod_Spec Spec = {
-            .name = "quad_matmul_qblas",
+            .name = spec_name,
             .nin = 2,
             .nout = 1,
             .casting = NPY_NO_CASTING,
@@ -460,5 +602,35 @@ init_matmul_ops(PyObject *numpy)
     Py_DECREF(promoter_capsule);
     Py_DECREF(ufunc);
 
+    return 0;
+}
+
+int
+init_matmul_ops(PyObject *numpy)
+{
+#ifndef DISABLE_QUADBLAS
+    if (register_matmul_like_ufunc(numpy, "matmul", "quad_matmul_qblas",
+                                   &quad_matmul_strided_loop_aligned,
+                                   &quad_matmul_strided_loop_unaligned) < 0) {
+        return -1;
+    }
+    if (register_matmul_like_ufunc(numpy, "vecdot", "quad_vecdot_qblas",
+                                   &quad_vecdot_strided_loop_aligned,
+                                   &quad_vecdot_strided_loop_unaligned) < 0) {
+        return -1;
+    }
+#else
+    if (register_matmul_like_ufunc(numpy, "matmul", "quad_matmul_naive",
+                                   &naive_matmul_strided_loop,
+                                   &naive_matmul_strided_loop) < 0) {
+        return -1;
+    }
+    if (register_matmul_like_ufunc(numpy, "vecdot", "quad_vecdot_naive",
+                                   &naive_vecdot_strided_loop,
+                                   &naive_vecdot_strided_loop) < 0) {
+        return -1;
+    }
+#endif  // DISABLE_QUADBLAS
+
     return 0;
 }
\ No newline at end of file
diff --git a/tests/test_dot.py b/tests/test_dot.py
index d5845d5..316c404 100644
--- a/tests/test_dot.py
+++ b/tests/test_dot.py
@@ -1,6 +1,6 @@
 import pytest
 import numpy as np
-from utils import create_quad_array, assert_quad_equal, assert_quad_array_equal, arrays_equal_with_nan
+from utils import create_quad_array, assert_quad_equal, assert_quad_array_equal, arrays_equal_with_nan, _q, _qarr
 from numpy_quaddtype import QuadPrecision, QuadPrecDType
 
 
@@ -689,4 +689,132 @@ def test_dimension_mismatch_matrices(self):
         B = create_quad_array([1, 2, 3, 4, 5, 6], shape=(3, 2))  # Wrong size
         
         with pytest.raises(ValueError, match=r"matmul: Input operand 1 has a mismatch in its core dimension 0"):
-            np.matmul(A, B)
\ No newline at end of file
+            np.matmul(A, B)
+
+
+class TestVecdot:
+    """Tests for np.vecdot on QuadPrecision arrays."""
+
+    def test_simple(self):
+        x = create_quad_array([1, 2, 3])
+        y = create_quad_array([4, 5, 6])
+        result = np.vecdot(x, y)
+        assert isinstance(result, QuadPrecision)
+        assert_quad_equal(result, 32.0)
+
+    def test_orthogonal(self):
+        x = create_quad_array([1, 0, 0])
+        y = create_quad_array([0, 1, 0])
+        assert_quad_equal(np.vecdot(x, y), 0.0)
+
+    def test_self_dot(self):
+        x = create_quad_array([2, 3, 4])
+        assert_quad_equal(np.vecdot(x, x), 29.0)
+
+    @pytest.mark.parametrize("size", [1, 2, 5, 10, 50, 100])
+    def test_various_sizes(self, size):
+        x_vals = [i + 1 for i in range(size)]
+        y_vals = [2 * (i + 1) for i in range(size)]
+        x = create_quad_array(x_vals)
+        y = create_quad_array(y_vals)
+        result = np.vecdot(x, y)
+        expected = sum(x_vals[i] * y_vals[i] for i in range(size))
+        assert_quad_equal(result, expected)
+
+    def test_negative_and_fractional(self):
+        x = create_quad_array([1.5, -2.5, 3.25])
+        y = create_quad_array([-1.25, 2.75, -3.5])
+        expected = 1.5 * -1.25 + -2.5 * 2.75 + 3.25 * -3.5
+        assert_quad_equal(np.vecdot(x, y), expected)
+
+    def test_single_element(self):
+        x = create_quad_array([7.0])
+        y = create_quad_array([6.0])
+        result = np.vecdot(x, y)
+        assert isinstance(result, QuadPrecision)
+        assert_quad_equal(result, 42.0)
+
+    def test_batched_vectors(self):
+        """vecdot broadcasts over leading dimensions."""
+        x = _qarr([[1, 2, 3], [4, 5, 6]])
+        y = _qarr([[1, 1, 1], [2, 2, 2]])
+        result = np.vecdot(x, y)
+        assert result.shape == (2,)
+        assert_quad_equal(result[0], 6.0)
+        assert_quad_equal(result[1], 30.0)
+
+    def test_batched_3d(self):
+        x = _qarr([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
+        y = _qarr([[[1, 1], [1, 1]], [[2, 2], [2, 2]]])
+        result = np.vecdot(x, y)
+        assert result.shape == (2, 2)
+        expected = [[3, 7], [22, 30]]
+        for i in range(2):
+            for j in range(2):
+                assert_quad_equal(result[i, j], expected[i][j])
+
+    def test_broadcast_against_scalar_vector(self):
+        """Broadcast a single vector against a stack."""
+        x = _qarr([[1, 2, 3], [4, 5, 6]])
+        y = _qarr([1, 1, 1])
+        result = np.vecdot(x, y)
+        assert result.shape == (2,)
+        assert_quad_equal(result[0], 6.0)
+        assert_quad_equal(result[1], 15.0)
+
+    @pytest.mark.parametrize("special_val", ["0.0", "-0.0", "inf", "-inf", "nan"])
+    def test_special_values(self, special_val):
+        x = create_quad_array([1.0, float(special_val), 2.0])
+        y = create_quad_array([3.0, 4.0, 5.0])
+        result = np.vecdot(x, y)
+        expected = np.vecdot(np.array([1.0, float(special_val), 2.0], dtype=np.float64),
+                             np.array([3.0, 4.0, 5.0], dtype=np.float64))
+        if np.isnan(expected):
+            assert np.isnan(float(result))
+        elif np.isinf(expected):
+            assert np.isinf(float(result))
+            assert np.sign(float(result)) == np.sign(expected)
+        else:
+            assert_quad_equal(result, expected)
+
+    def test_matches_matmul_for_1d(self):
+        """np.matmul of two 1D arrays is equivalent to vecdot."""
+        x = create_quad_array([1.5, 2.5, -3.0, 0.25])
+        y = create_quad_array([4.0, -1.0, 2.0, 8.0])
+        assert_quad_equal(np.vecdot(x, y), np.matmul(x, y))
+
+    def test_precision_advantage(self):
+        """vecdot accumulates with quad precision, beating float64 cancellation."""
+        x = create_quad_array([1e20, 1.0, -1e20])
+        y = create_quad_array([0.0, 1.0, 0.0])
+        assert_quad_equal(np.vecdot(x, y), 1.0, atol=1e-25)
+
+    def test_dimension_mismatch(self):
+        x = create_quad_array([1, 2])
+        y = create_quad_array([1, 2, 3])
+        with pytest.raises(ValueError):
+            np.vecdot(x, y)
+
+    @pytest.mark.parametrize("shape", [
+        (0,),       # 1-D empty → scalar 0
+        (3, 0),     # 3 batches, each empty inner → (3,) zeros
+        (0, 3),     # 0 batches, non-empty inner → empty (0,)
+    ], ids=lambda s: "x".join(str(d) for d in s))
+    def test_empty_vecdot_matches_float64(self, shape):
+        dtype = QuadPrecDType(backend='sleef')
+        x_q = np.zeros(shape, dtype=dtype)
+        y_q = np.zeros(shape, dtype=dtype)
+        x_f = np.zeros(shape, dtype=np.float64)
+        y_f = np.zeros(shape, dtype=np.float64)
+
+        R_q = np.vecdot(x_q, y_q)
+        R_f = np.vecdot(x_f, y_f)
+
+        # 1-D vecdot returns a 0-d scalar QuadPrecision (no .shape attr).
+        q_shape = () if isinstance(R_q, QuadPrecision) else R_q.shape
+        assert q_shape == R_f.shape, f"shape {q_shape} != float64 {R_f.shape}"
+        assert R_q.dtype == dtype
+
+        R_q_as_f = (np.float64(R_q) if isinstance(R_q, QuadPrecision)
+                    else np.asarray(R_q).astype(np.float64))
+        assert np.array_equal(R_q_as_f, R_f), f"values {R_q_as_f!r} != {R_f!r}"
diff --git a/tests/utils.py b/tests/utils.py
index dcdbc67..8a54246 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -95,4 +95,18 @@ def arrays_equal_with_nan(a, b, rtol=1e-15, atol=1e-15):
             except AssertionError:
                 return False
     
-    return True
\ No newline at end of file
+    return True
+
+
+def _q(v):
+    return QuadPrecision(str(float(v)), backend='sleef')
+
+
+def _qarr(values, shape=None):
+    """Build an n-D QuadPrecision array from a (possibly nested) Python sequence."""
+    dt = QuadPrecDType(backend='sleef')
+    arr = np.asarray(values, dtype=np.float64)
+    if shape is not None:
+        arr = arr.reshape(shape)
+    flat = [_q(v) for v in arr.ravel().tolist()]
+    return np.array(flat, dtype=dt).reshape(arr.shape)