From 135602f7d96df3ce64559b411050ed0ca21cdb95 Mon Sep 17 00:00:00 2001 From: hass-nation Date: Mon, 29 Jun 2026 20:41:51 +0100 Subject: [PATCH] Add Pesaran CD panel dependence diagnostic Add a cached pesaran_cd result property for panel estimators, backed by idiosyncratic residual correlations and a standard-normal test container, with balanced and unbalanced coverage in the panel and shared hypothesis tests. Implements the diagnostic in Pesaran, M. H. (2021), "General diagnostic tests for cross-sectional dependence in panels", Empirical Economics, 60(1), 13-50. Co-Authored-By: OpenAI Codex --- linearmodels/panel/results.py | 73 +++++++++++++++++++- linearmodels/shared/hypotheses.py | 84 ++++++++++++++++++++++- linearmodels/tests/panel/test_results.py | 49 +++++++++++++ linearmodels/tests/shared/test_utility.py | 14 ++++ 4 files changed, 218 insertions(+), 2 deletions(-) diff --git a/linearmodels/panel/results.py b/linearmodels/panel/results.py index 5617605c89..fa89b369a7 100644 --- a/linearmodels/panel/results.py +++ b/linearmodels/panel/results.py @@ -15,7 +15,12 @@ from linearmodels.iv.results import default_txt_fmt, stub_concat, table_concat from linearmodels.shared.base import _ModelComparison, _SummaryStr -from linearmodels.shared.hypotheses import WaldTestStatistic, quadratic_form_test +from linearmodels.shared.hypotheses import ( + InapplicableTestStatistic, + NormalTestStatistic, + WaldTestStatistic, + quadratic_form_test, +) from linearmodels.shared.io import _str, add_star, pval_format from linearmodels.shared.utility import AttrDict import linearmodels.typing.data @@ -595,6 +600,72 @@ def wresids(self) -> Series: self._wresids.squeeze(), index=self._index, name="weighted residual" ) + @cached_property + def pesaran_cd(self) -> InapplicableTestStatistic | NormalTestStatistic: + r""" + Pesaran CD test of residual cross-sectional dependence. + + Returns + ------- + NormalTestStatistic + Statistic value, distribution and p-value. + + Notes + ----- + Tests the null hypothesis that the model's idiosyncratic shocks are + cross-sectionally independent. Let :math:`\hat{\rho}_{ij}` denote the + sample correlation of the estimated idiosyncratic shocks for entities + :math:`i` and :math:`j`, computed using their common time observations, + and let :math:`T_{ij}` denote the number of overlapping observations. + The statistic is + + .. math:: + + CD = \frac{1}{\sqrt{M}}\sum_{i= 2 + selector &= np.isfinite(corr) + npairs = int(selector.sum()) + if npairs == 0: + return InapplicableTestStatistic( + reason=( + "Pesaran CD test requires at least one pair of entities " + "with two or more overlapping observations." + ), + name="Pesaran CD test", + ) + + stat = float( + np.sum(np.sqrt(counts[selector].astype(float)) * corr[selector]) + / np.sqrt(npairs) + ) + null = "Idiosyncratic shocks are cross-sectionally independent" + return NormalTestStatistic(stat, null, name="Pesaran CD test") + @property def f_statistic_robust(self) -> WaldTestStatistic: r""" diff --git a/linearmodels/shared/hypotheses.py b/linearmodels/shared/hypotheses.py index ddf3dc35cd..796b046bd9 100644 --- a/linearmodels/shared/hypotheses.py +++ b/linearmodels/shared/hypotheses.py @@ -5,7 +5,7 @@ from formulaic.utils.constraints import LinearConstraints import numpy as np from pandas import Series -from scipy.stats import chi2, f +from scipy.stats import chi2, f, norm import linearmodels.typing.data @@ -97,6 +97,88 @@ def __repr__(self) -> str: ) +class NormalTestStatistic: + """ + Test statistic holder for tests with an asymptotic standard normal law. + + Parameters + ---------- + stat : float + The test statistic. + null : str + A statement of the test's null hypothesis. + name : str + Name of test. + two_sided : bool + Flag indicating whether p-values and critical values are computed + using a two-sided test. + """ + + def __init__( + self, + stat: float, + null: str, + *, + name: str | None = None, + two_sided: bool = True, + ) -> None: + self._stat = stat + self._null = null + self._name = name + self._two_sided = two_sided + self.dist = norm() + self.dist_name = "N(0,1)" + + @property + def stat(self) -> float: + """Test statistic""" + return self._stat + + @property + def pval(self) -> float: + """P-value of test statistic""" + if self._two_sided: + return 2 * (1 - self.dist.cdf(abs(self.stat))) + return 1 - self.dist.cdf(self.stat) + + @property + def critical_values(self) -> dict[str, float]: + """Critical values for common test sizes""" + if self._two_sided: + quantiles = [0.95, 0.975, 0.995] + else: + quantiles = [0.9, 0.95, 0.99] + return dict( + zip(["10%", "5%", "1%"], self.dist.ppf(quantiles), strict=False) + ) + + @property + def null(self) -> str: + """Null hypothesis""" + return self._null + + def __str__(self) -> str: + name = "" + if self._name is not None: + name = self._name + "\n" + msg = ( + "{name}H0: {null}\nStatistic: {stat:0.4f}\n" + "P-value: {pval:0.4f}\nDistributed: {dist}" + ) + return msg.format( + name=name, + null=self.null, + stat=self.stat, + pval=self.pval, + dist=self.dist_name, + ) + + def __repr__(self) -> str: + return ( + self.__str__() + "\n" + self.__class__.__name__ + f", id: {hex(id(self))}" + ) + + class InvalidTestWarning(UserWarning): pass diff --git a/linearmodels/tests/panel/test_results.py b/linearmodels/tests/panel/test_results.py index 620d1a3718..e8b9e3a92d 100644 --- a/linearmodels/tests/panel/test_results.py +++ b/linearmodels/tests/panel/test_results.py @@ -2,8 +2,10 @@ import numpy as np from numpy.testing import assert_allclose +import pandas as pd from pandas.testing import assert_series_equal import pytest +from scipy import stats from statsmodels.tools.tools import add_constant from linearmodels.datasets import wage_panel @@ -11,6 +13,10 @@ from linearmodels.panel.data import PanelData from linearmodels.panel.model import PanelOLS, PooledOLS, RandomEffects from linearmodels.panel.results import compare +from linearmodels.shared.hypotheses import ( + InapplicableTestStatistic, + NormalTestStatistic, +) from linearmodels.tests.panel._utility import datatypes, generate_data @@ -25,6 +31,20 @@ def data(request): ids = ["-".join(str(param) for param in perm) for perm in perms] +def direct_pesaran_cd(idiosyncratic): + wide = idiosyncratic.iloc[:, 0].unstack(level=0) + wide = wide.loc[:, wide.notnull().any(axis=0)] + corr = wide.corr(min_periods=2).to_numpy(dtype=float) + counts = wide.notnull().to_numpy(dtype=np.int64) + overlaps = counts.T @ counts + selector = np.tril(np.ones_like(overlaps, dtype=bool), k=-1) + selector &= overlaps >= 2 + selector &= np.isfinite(corr) + npairs = int(selector.sum()) + stat = np.sum(np.sqrt(overlaps[selector].astype(float)) * corr[selector]) + return stat / np.sqrt(npairs) + + @pytest.fixture(params=perms, ids=ids) def generated_data(request): missing, datatype, const = request.param @@ -91,6 +111,35 @@ def test_multiple_no_effects(data): compare({"a": res, "model2": res3, "model3": res4}) +def test_pesaran_cd(data): + dependent = data.set_index(["nr", "year"]).lwage + exog = add_constant(data.set_index(["nr", "year"])[["expersq", "married", "union"]]) + res = PanelOLS(dependent, exog, entity_effects=True).fit() + cd = res.pesaran_cd + assert isinstance(cd, NormalTestStatistic) + direct = direct_pesaran_cd(res.idiosyncratic) + assert_allclose(cd.stat, direct) + assert_allclose(cd.pval, 2 * (1 - stats.norm.cdf(abs(direct)))) + + +def test_pesaran_cd_unbalanced(): + data = generate_data(0.2, "pandas", const=True, ntk=(40, 8, 3)) + res = PanelOLS(data.y, data.x, entity_effects=True).fit() + cd = res.pesaran_cd + assert isinstance(cd, NormalTestStatistic) + assert_allclose(cd.stat, direct_pesaran_cd(res.idiosyncratic)) + + +def test_pesaran_cd_inapplicable(): + index = pd.MultiIndex.from_product([["firm0"], range(8)], names=["firm", "time"]) + y = pd.Series(np.linspace(0.0, 1.0, 8), index=index, name="y") + x = pd.DataFrame({"const": 1.0, "x1": np.linspace(-1.0, 1.0, 8)}, index=index) + res = PooledOLS(y, x).fit() + cd = res.pesaran_cd + assert isinstance(cd, InapplicableTestStatistic) + assert np.isnan(cd.pval) + + def test_incorrect_type(data): dependent = data.set_index(["nr", "year"]).lwage exog = add_constant(data.set_index(["nr", "year"])[["expersq", "married", "union"]]) diff --git a/linearmodels/tests/shared/test_utility.py b/linearmodels/tests/shared/test_utility.py index 4f2f5b5774..a2ac2a5f14 100644 --- a/linearmodels/tests/shared/test_utility.py +++ b/linearmodels/tests/shared/test_utility.py @@ -16,6 +16,7 @@ from linearmodels.shared.hypotheses import ( InapplicableTestStatistic, InvalidTestStatistic, + NormalTestStatistic, WaldTestStatistic, ) from linearmodels.shared.io import add_star, format_wide @@ -101,6 +102,19 @@ def test_inapplicable_test_statistic(): assert "not applicable" in str(ts) +def test_normal_test_statistic(): + ts = NormalTestStatistic(1.5, "_NULL_", name="_NAME_") + assert str(hex(id(ts))) in ts.__repr__() + assert "_NULL_" in str(ts) + assert ts.stat == 1.5 + assert ts.dist_name == "N(0,1)" + assert_allclose(2 * (1 - stats.norm.cdf(1.5)), ts.pval) + assert isinstance(ts.critical_values, dict) + + ts = NormalTestStatistic(1.5, "_NULL_", name="_NAME_", two_sided=False) + assert_allclose(1 - stats.norm.cdf(1.5), ts.pval) + + def test_inv_sqrth(): x = np.random.randn(1000, 10) xpx = x.T @ x