From 135602f7d96df3ce64559b411050ed0ca21cdb95 Mon Sep 17 00:00:00 2001
From: hass-nation <he3190@gmail.com>
Date: Mon, 29 Jun 2026 20:41:51 +0100
Subject: [PATCH] Add Pesaran CD panel dependence diagnostic

Add a cached pesaran_cd result property for panel estimators, backed by idiosyncratic residual correlations and a standard-normal test container, with balanced and unbalanced coverage in the panel and shared hypothesis tests. Implements the diagnostic in Pesaran, M. H. (2021), "General diagnostic tests for cross-sectional dependence in panels", Empirical Economics, 60(1), 13-50.

Co-Authored-By: OpenAI Codex <codex@openai.com>
---
 linearmodels/panel/results.py             | 73 +++++++++++++++++++-
 linearmodels/shared/hypotheses.py         | 84 ++++++++++++++++++++++-
 linearmodels/tests/panel/test_results.py  | 49 +++++++++++++
 linearmodels/tests/shared/test_utility.py | 14 ++++
 4 files changed, 218 insertions(+), 2 deletions(-)

diff --git a/linearmodels/panel/results.py b/linearmodels/panel/results.py
index 5617605c89..fa89b369a7 100644
--- a/linearmodels/panel/results.py
+++ b/linearmodels/panel/results.py
@@ -15,7 +15,12 @@
 
 from linearmodels.iv.results import default_txt_fmt, stub_concat, table_concat
 from linearmodels.shared.base import _ModelComparison, _SummaryStr
-from linearmodels.shared.hypotheses import WaldTestStatistic, quadratic_form_test
+from linearmodels.shared.hypotheses import (
+    InapplicableTestStatistic,
+    NormalTestStatistic,
+    WaldTestStatistic,
+    quadratic_form_test,
+)
 from linearmodels.shared.io import _str, add_star, pval_format
 from linearmodels.shared.utility import AttrDict
 import linearmodels.typing.data
@@ -595,6 +600,72 @@ def wresids(self) -> Series:
             self._wresids.squeeze(), index=self._index, name="weighted residual"
         )
 
+    @cached_property
+    def pesaran_cd(self) -> InapplicableTestStatistic | NormalTestStatistic:
+        r"""
+        Pesaran CD test of residual cross-sectional dependence.
+
+        Returns
+        -------
+        NormalTestStatistic
+            Statistic value, distribution and p-value.
+
+        Notes
+        -----
+        Tests the null hypothesis that the model's idiosyncratic shocks are
+        cross-sectionally independent. Let :math:`\hat{\rho}_{ij}` denote the
+        sample correlation of the estimated idiosyncratic shocks for entities
+        :math:`i` and :math:`j`, computed using their common time observations,
+        and let :math:`T_{ij}` denote the number of overlapping observations.
+        The statistic is
+
+        .. math::
+
+           CD = \frac{1}{\sqrt{M}}\sum_{i<j}\sqrt{T_{ij}}\hat{\rho}_{ij},
+
+        where :math:`M` is the number of entity pairs with at least two
+        overlapping observations and finite pairwise correlation estimates.
+        Under the null, the statistic is asymptotically standard normal.
+
+        References
+        ----------
+        Pesaran, M. H. (2021). General diagnostic tests for cross-sectional
+        dependence in panels. Empirical Economics, 60(1), 13-50.
+        """
+        eps = self.idiosyncratic.iloc[:, 0]
+        wide = eps.unstack(level=0)
+        wide = wide.loc[:, wide.notnull().any(axis=0)]
+        nentity = wide.shape[1]
+        if nentity < 2:
+            return InapplicableTestStatistic(
+                reason="Pesaran CD test requires at least two entities.",
+                name="Pesaran CD test",
+            )
+
+        corr = wide.corr(min_periods=2).to_numpy(dtype=float)
+        counts = wide.notnull().to_numpy(dtype=np.int64).T @ wide.notnull().to_numpy(
+            dtype=np.int64
+        )
+        selector = np.tril(np.ones_like(counts, dtype=bool), k=-1)
+        selector &= counts >= 2
+        selector &= np.isfinite(corr)
+        npairs = int(selector.sum())
+        if npairs == 0:
+            return InapplicableTestStatistic(
+                reason=(
+                    "Pesaran CD test requires at least one pair of entities "
+                    "with two or more overlapping observations."
+                ),
+                name="Pesaran CD test",
+            )
+
+        stat = float(
+            np.sum(np.sqrt(counts[selector].astype(float)) * corr[selector])
+            / np.sqrt(npairs)
+        )
+        null = "Idiosyncratic shocks are cross-sectionally independent"
+        return NormalTestStatistic(stat, null, name="Pesaran CD test")
+
     @property
     def f_statistic_robust(self) -> WaldTestStatistic:
         r"""
diff --git a/linearmodels/shared/hypotheses.py b/linearmodels/shared/hypotheses.py
index ddf3dc35cd..796b046bd9 100644
--- a/linearmodels/shared/hypotheses.py
+++ b/linearmodels/shared/hypotheses.py
@@ -5,7 +5,7 @@
 from formulaic.utils.constraints import LinearConstraints
 import numpy as np
 from pandas import Series
-from scipy.stats import chi2, f
+from scipy.stats import chi2, f, norm
 
 import linearmodels.typing.data
 
@@ -97,6 +97,88 @@ def __repr__(self) -> str:
         )
 
 
+class NormalTestStatistic:
+    """
+    Test statistic holder for tests with an asymptotic standard normal law.
+
+    Parameters
+    ----------
+    stat : float
+        The test statistic.
+    null : str
+        A statement of the test's null hypothesis.
+    name : str
+        Name of test.
+    two_sided : bool
+        Flag indicating whether p-values and critical values are computed
+        using a two-sided test.
+    """
+
+    def __init__(
+        self,
+        stat: float,
+        null: str,
+        *,
+        name: str | None = None,
+        two_sided: bool = True,
+    ) -> None:
+        self._stat = stat
+        self._null = null
+        self._name = name
+        self._two_sided = two_sided
+        self.dist = norm()
+        self.dist_name = "N(0,1)"
+
+    @property
+    def stat(self) -> float:
+        """Test statistic"""
+        return self._stat
+
+    @property
+    def pval(self) -> float:
+        """P-value of test statistic"""
+        if self._two_sided:
+            return 2 * (1 - self.dist.cdf(abs(self.stat)))
+        return 1 - self.dist.cdf(self.stat)
+
+    @property
+    def critical_values(self) -> dict[str, float]:
+        """Critical values for common test sizes"""
+        if self._two_sided:
+            quantiles = [0.95, 0.975, 0.995]
+        else:
+            quantiles = [0.9, 0.95, 0.99]
+        return dict(
+            zip(["10%", "5%", "1%"], self.dist.ppf(quantiles), strict=False)
+        )
+
+    @property
+    def null(self) -> str:
+        """Null hypothesis"""
+        return self._null
+
+    def __str__(self) -> str:
+        name = ""
+        if self._name is not None:
+            name = self._name + "\n"
+        msg = (
+            "{name}H0: {null}\nStatistic: {stat:0.4f}\n"
+            "P-value: {pval:0.4f}\nDistributed: {dist}"
+        )
+        return msg.format(
+            name=name,
+            null=self.null,
+            stat=self.stat,
+            pval=self.pval,
+            dist=self.dist_name,
+        )
+
+    def __repr__(self) -> str:
+        return (
+            self.__str__() + "\n" + self.__class__.__name__ + f", id: {hex(id(self))}"
+        )
+
+
 class InvalidTestWarning(UserWarning):
     pass
 
diff --git a/linearmodels/tests/panel/test_results.py b/linearmodels/tests/panel/test_results.py
index 620d1a3718..e8b9e3a92d 100644
--- a/linearmodels/tests/panel/test_results.py
+++ b/linearmodels/tests/panel/test_results.py
@@ -2,8 +2,10 @@
 
 import numpy as np
 from numpy.testing import assert_allclose
+import pandas as pd
 from pandas.testing import assert_series_equal
 import pytest
+from scipy import stats
 from statsmodels.tools.tools import add_constant
 
 from linearmodels.datasets import wage_panel
@@ -11,6 +13,10 @@
 from linearmodels.panel.data import PanelData
 from linearmodels.panel.model import PanelOLS, PooledOLS, RandomEffects
 from linearmodels.panel.results import compare
+from linearmodels.shared.hypotheses import (
+    InapplicableTestStatistic,
+    NormalTestStatistic,
+)
 from linearmodels.tests.panel._utility import datatypes, generate_data
 
 
@@ -25,6 +31,20 @@ def data(request):
 ids = ["-".join(str(param) for param in perm) for perm in perms]
 
 
+def direct_pesaran_cd(idiosyncratic):
+    wide = idiosyncratic.iloc[:, 0].unstack(level=0)
+    wide = wide.loc[:, wide.notnull().any(axis=0)]
+    corr = wide.corr(min_periods=2).to_numpy(dtype=float)
+    counts = wide.notnull().to_numpy(dtype=np.int64)
+    overlaps = counts.T @ counts
+    selector = np.tril(np.ones_like(overlaps, dtype=bool), k=-1)
+    selector &= overlaps >= 2
+    selector &= np.isfinite(corr)
+    npairs = int(selector.sum())
+    stat = np.sum(np.sqrt(overlaps[selector].astype(float)) * corr[selector])
+    return stat / np.sqrt(npairs)
+
+
 @pytest.fixture(params=perms, ids=ids)
 def generated_data(request):
     missing, datatype, const = request.param
@@ -91,6 +111,35 @@ def test_multiple_no_effects(data):
     compare({"a": res, "model2": res3, "model3": res4})
 
 
+def test_pesaran_cd(data):
+    dependent = data.set_index(["nr", "year"]).lwage
+    exog = add_constant(data.set_index(["nr", "year"])[["expersq", "married", "union"]])
+    res = PanelOLS(dependent, exog, entity_effects=True).fit()
+    cd = res.pesaran_cd
+    assert isinstance(cd, NormalTestStatistic)
+    direct = direct_pesaran_cd(res.idiosyncratic)
+    assert_allclose(cd.stat, direct)
+    assert_allclose(cd.pval, 2 * (1 - stats.norm.cdf(abs(direct))))
+
+
+def test_pesaran_cd_unbalanced():
+    data = generate_data(0.2, "pandas", const=True, ntk=(40, 8, 3))
+    res = PanelOLS(data.y, data.x, entity_effects=True).fit()
+    cd = res.pesaran_cd
+    assert isinstance(cd, NormalTestStatistic)
+    assert_allclose(cd.stat, direct_pesaran_cd(res.idiosyncratic))
+
+
+def test_pesaran_cd_inapplicable():
+    index = pd.MultiIndex.from_product([["firm0"], range(8)], names=["firm", "time"])
+    y = pd.Series(np.linspace(0.0, 1.0, 8), index=index, name="y")
+    x = pd.DataFrame({"const": 1.0, "x1": np.linspace(-1.0, 1.0, 8)}, index=index)
+    res = PooledOLS(y, x).fit()
+    cd = res.pesaran_cd
+    assert isinstance(cd, InapplicableTestStatistic)
+    assert np.isnan(cd.pval)
+
+
 def test_incorrect_type(data):
     dependent = data.set_index(["nr", "year"]).lwage
     exog = add_constant(data.set_index(["nr", "year"])[["expersq", "married", "union"]])
diff --git a/linearmodels/tests/shared/test_utility.py b/linearmodels/tests/shared/test_utility.py
index 4f2f5b5774..a2ac2a5f14 100644
--- a/linearmodels/tests/shared/test_utility.py
+++ b/linearmodels/tests/shared/test_utility.py
@@ -16,6 +16,7 @@
 from linearmodels.shared.hypotheses import (
     InapplicableTestStatistic,
     InvalidTestStatistic,
+    NormalTestStatistic,
     WaldTestStatistic,
 )
 from linearmodels.shared.io import add_star, format_wide
@@ -101,6 +102,19 @@ def test_inapplicable_test_statistic():
     assert "not applicable" in str(ts)
 
 
+def test_normal_test_statistic():
+    ts = NormalTestStatistic(1.5, "_NULL_", name="_NAME_")
+    assert str(hex(id(ts))) in ts.__repr__()
+    assert "_NULL_" in str(ts)
+    assert ts.stat == 1.5
+    assert ts.dist_name == "N(0,1)"
+    assert_allclose(2 * (1 - stats.norm.cdf(1.5)), ts.pval)
+    assert isinstance(ts.critical_values, dict)
+
+    ts = NormalTestStatistic(1.5, "_NULL_", name="_NAME_", two_sided=False)
+    assert_allclose(1 - stats.norm.cdf(1.5), ts.pval)
+
+
 def test_inv_sqrth():
     x = np.random.randn(1000, 10)
     xpx = x.T @ x