Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 72 additions & 1 deletion linearmodels/panel/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@

from linearmodels.iv.results import default_txt_fmt, stub_concat, table_concat
from linearmodels.shared.base import _ModelComparison, _SummaryStr
from linearmodels.shared.hypotheses import WaldTestStatistic, quadratic_form_test
from linearmodels.shared.hypotheses import (
InapplicableTestStatistic,
NormalTestStatistic,
WaldTestStatistic,
quadratic_form_test,
)
from linearmodels.shared.io import _str, add_star, pval_format
from linearmodels.shared.utility import AttrDict
import linearmodels.typing.data
Expand Down Expand Up @@ -595,6 +600,72 @@ def wresids(self) -> Series:
self._wresids.squeeze(), index=self._index, name="weighted residual"
)

@cached_property
def pesaran_cd(self) -> InapplicableTestStatistic | NormalTestStatistic:
r"""
Pesaran CD test of residual cross-sectional dependence.

Returns
-------
NormalTestStatistic
Statistic value, distribution and p-value.

Notes
-----
Tests the null hypothesis that the model's idiosyncratic shocks are
cross-sectionally independent. Let :math:`\hat{\rho}_{ij}` denote the
sample correlation of the estimated idiosyncratic shocks for entities
:math:`i` and :math:`j`, computed using their common time observations,
and let :math:`T_{ij}` denote the number of overlapping observations.
The statistic is

.. math::

CD = \frac{1}{\sqrt{M}}\sum_{i<j}\sqrt{T_{ij}}\hat{\rho}_{ij},

where :math:`M` is the number of entity pairs with at least two
overlapping observations and finite pairwise correlation estimates.
Under the null, the statistic is asymptotically standard normal.

References
----------
Pesaran, M. H. (2021). General diagnostic tests for cross-sectional
dependence in panels. Empirical Economics, 60(1), 13-50.
"""
eps = self.idiosyncratic.iloc[:, 0]
wide = eps.unstack(level=0)
wide = wide.loc[:, wide.notnull().any(axis=0)]
nentity = wide.shape[1]
if nentity < 2:
return InapplicableTestStatistic(
reason="Pesaran CD test requires at least two entities.",
name="Pesaran CD test",
)

corr = wide.corr(min_periods=2).to_numpy(dtype=float)
counts = wide.notnull().to_numpy(dtype=np.int64).T @ wide.notnull().to_numpy(
dtype=np.int64
)
selector = np.tril(np.ones_like(counts, dtype=bool), k=-1)
selector &= counts >= 2
selector &= np.isfinite(corr)
npairs = int(selector.sum())
if npairs == 0:
return InapplicableTestStatistic(
reason=(
"Pesaran CD test requires at least one pair of entities "
"with two or more overlapping observations."
),
name="Pesaran CD test",
)

stat = float(
np.sum(np.sqrt(counts[selector].astype(float)) * corr[selector])
/ np.sqrt(npairs)
)
null = "Idiosyncratic shocks are cross-sectionally independent"
return NormalTestStatistic(stat, null, name="Pesaran CD test")

@property
def f_statistic_robust(self) -> WaldTestStatistic:
r"""
Expand Down
84 changes: 83 additions & 1 deletion linearmodels/shared/hypotheses.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from formulaic.utils.constraints import LinearConstraints
import numpy as np
from pandas import Series
from scipy.stats import chi2, f
from scipy.stats import chi2, f, norm

import linearmodels.typing.data

Expand Down Expand Up @@ -97,6 +97,88 @@ def __repr__(self) -> str:
)


class NormalTestStatistic:
"""
Test statistic holder for tests with an asymptotic standard normal law.

Parameters
----------
stat : float
The test statistic.
null : str
A statement of the test's null hypothesis.
name : str
Name of test.
two_sided : bool
Flag indicating whether p-values and critical values are computed
using a two-sided test.
"""

def __init__(
self,
stat: float,
null: str,
*,
name: str | None = None,
two_sided: bool = True,
) -> None:
self._stat = stat
self._null = null
self._name = name
self._two_sided = two_sided
self.dist = norm()
self.dist_name = "N(0,1)"

@property
def stat(self) -> float:
"""Test statistic"""
return self._stat

@property
def pval(self) -> float:
"""P-value of test statistic"""
if self._two_sided:
return 2 * (1 - self.dist.cdf(abs(self.stat)))
return 1 - self.dist.cdf(self.stat)

@property
def critical_values(self) -> dict[str, float]:
"""Critical values for common test sizes"""
if self._two_sided:
quantiles = [0.95, 0.975, 0.995]
else:
quantiles = [0.9, 0.95, 0.99]
return dict(
zip(["10%", "5%", "1%"], self.dist.ppf(quantiles), strict=False)
)

@property
def null(self) -> str:
"""Null hypothesis"""
return self._null

def __str__(self) -> str:
name = ""
if self._name is not None:
name = self._name + "\n"
msg = (
"{name}H0: {null}\nStatistic: {stat:0.4f}\n"
"P-value: {pval:0.4f}\nDistributed: {dist}"
)
return msg.format(
name=name,
null=self.null,
stat=self.stat,
pval=self.pval,
dist=self.dist_name,
)

def __repr__(self) -> str:
return (
self.__str__() + "\n" + self.__class__.__name__ + f", id: {hex(id(self))}"
)


class InvalidTestWarning(UserWarning):
pass

Expand Down
49 changes: 49 additions & 0 deletions linearmodels/tests/panel/test_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,21 @@

import numpy as np
from numpy.testing import assert_allclose
import pandas as pd
from pandas.testing import assert_series_equal
import pytest
from scipy import stats
from statsmodels.tools.tools import add_constant

from linearmodels.datasets import wage_panel
from linearmodels.iv.model import IV2SLS
from linearmodels.panel.data import PanelData
from linearmodels.panel.model import PanelOLS, PooledOLS, RandomEffects
from linearmodels.panel.results import compare
from linearmodels.shared.hypotheses import (
InapplicableTestStatistic,
NormalTestStatistic,
)
from linearmodels.tests.panel._utility import datatypes, generate_data


Expand All @@ -25,6 +31,20 @@ def data(request):
ids = ["-".join(str(param) for param in perm) for perm in perms]


def direct_pesaran_cd(idiosyncratic):
wide = idiosyncratic.iloc[:, 0].unstack(level=0)
wide = wide.loc[:, wide.notnull().any(axis=0)]
corr = wide.corr(min_periods=2).to_numpy(dtype=float)
counts = wide.notnull().to_numpy(dtype=np.int64)
overlaps = counts.T @ counts
selector = np.tril(np.ones_like(overlaps, dtype=bool), k=-1)
selector &= overlaps >= 2
selector &= np.isfinite(corr)
npairs = int(selector.sum())
stat = np.sum(np.sqrt(overlaps[selector].astype(float)) * corr[selector])
return stat / np.sqrt(npairs)


@pytest.fixture(params=perms, ids=ids)
def generated_data(request):
missing, datatype, const = request.param
Expand Down Expand Up @@ -91,6 +111,35 @@ def test_multiple_no_effects(data):
compare({"a": res, "model2": res3, "model3": res4})


def test_pesaran_cd(data):
dependent = data.set_index(["nr", "year"]).lwage
exog = add_constant(data.set_index(["nr", "year"])[["expersq", "married", "union"]])
res = PanelOLS(dependent, exog, entity_effects=True).fit()
cd = res.pesaran_cd
assert isinstance(cd, NormalTestStatistic)
direct = direct_pesaran_cd(res.idiosyncratic)
assert_allclose(cd.stat, direct)
assert_allclose(cd.pval, 2 * (1 - stats.norm.cdf(abs(direct))))


def test_pesaran_cd_unbalanced():
data = generate_data(0.2, "pandas", const=True, ntk=(40, 8, 3))
res = PanelOLS(data.y, data.x, entity_effects=True).fit()
cd = res.pesaran_cd
assert isinstance(cd, NormalTestStatistic)
assert_allclose(cd.stat, direct_pesaran_cd(res.idiosyncratic))


def test_pesaran_cd_inapplicable():
index = pd.MultiIndex.from_product([["firm0"], range(8)], names=["firm", "time"])
y = pd.Series(np.linspace(0.0, 1.0, 8), index=index, name="y")
x = pd.DataFrame({"const": 1.0, "x1": np.linspace(-1.0, 1.0, 8)}, index=index)
res = PooledOLS(y, x).fit()
cd = res.pesaran_cd
assert isinstance(cd, InapplicableTestStatistic)
assert np.isnan(cd.pval)


def test_incorrect_type(data):
dependent = data.set_index(["nr", "year"]).lwage
exog = add_constant(data.set_index(["nr", "year"])[["expersq", "married", "union"]])
Expand Down
14 changes: 14 additions & 0 deletions linearmodels/tests/shared/test_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from linearmodels.shared.hypotheses import (
InapplicableTestStatistic,
InvalidTestStatistic,
NormalTestStatistic,
WaldTestStatistic,
)
from linearmodels.shared.io import add_star, format_wide
Expand Down Expand Up @@ -101,6 +102,19 @@ def test_inapplicable_test_statistic():
assert "not applicable" in str(ts)


def test_normal_test_statistic():
ts = NormalTestStatistic(1.5, "_NULL_", name="_NAME_")
assert str(hex(id(ts))) in ts.__repr__()
assert "_NULL_" in str(ts)
assert ts.stat == 1.5
assert ts.dist_name == "N(0,1)"
assert_allclose(2 * (1 - stats.norm.cdf(1.5)), ts.pval)
assert isinstance(ts.critical_values, dict)

ts = NormalTestStatistic(1.5, "_NULL_", name="_NAME_", two_sided=False)
assert_allclose(1 - stats.norm.cdf(1.5), ts.pval)


def test_inv_sqrth():
x = np.random.randn(1000, 10)
xpx = x.T @ x
Expand Down