diff --git a/linearmodels/iv/__init__.py b/linearmodels/iv/__init__.py index 8b6ddfaa99..d7e9270f14 100644 --- a/linearmodels/iv/__init__.py +++ b/linearmodels/iv/__init__.py @@ -1,11 +1,12 @@ from .absorbing import AbsorbingLS, Interaction -from .model import IV2SLS, IVGMM, IVGMMCUE, IVLIML +from .model import IV2SLS, IVGMM, IVGMMCUE, IVJIVE, IVLIML from .results import compare __all__ = [ "IV2SLS", "IVGMM", "IVGMMCUE", + "IVJIVE", "IVLIML", "AbsorbingLS", "Interaction", diff --git a/linearmodels/iv/model.py b/linearmodels/iv/model.py index 41cd06d049..3f79775a1a 100644 --- a/linearmodels/iv/model.py +++ b/linearmodels/iv/model.py @@ -1759,3 +1759,275 @@ def _gmm_model_from_formula( ) mod.formula = formula return mod + + +# --------------------------------------------------------------------------- +# Private covariance helper for JIVE +# --------------------------------------------------------------------------- + + +class _JIVECovariance: + r"""Heteroskedasticity-robust sandwich covariance for IVJIVE. + + The JIVE estimator solves :math:`\tilde{X}'X\hat\beta = \tilde{X}'y`, + where :math:`\tilde{X}_i` is the leave-one-out first-stage prediction. + The sandwich covariance is + + .. math:: + + n^{-1} (\tilde{X}'X/n)^{-1} + \Bigl(n^{-1}\sum_i \hat\epsilon_i^2 \tilde{x}_i \tilde{x}_i'\Bigr) + (\tilde{X}'X/n)^{-1} + + Parameters + ---------- + x : ndarray, shape (n, k) + Weighted regressors. + y : ndarray, shape (n, 1) + Weighted dependent variable. + x_loo : ndarray, shape (n, k) + Leave-one-out first-stage predictions. + params : ndarray, shape (k,) + JIVE coefficient estimates. + debiased : bool + Apply small-sample degree-of-freedom adjustment. + """ + + def __init__( + self, + x: linearmodels.typing.data.Float64Array, + y: linearmodels.typing.data.Float64Array, + x_loo: linearmodels.typing.data.Float64Array, + params: linearmodels.typing.data.Float64Array, + debiased: bool = False, + ) -> None: + self.x = x + self.y = y + self.x_loo = x_loo + self.params = params + self._debiased = debiased + self.eps = y - x @ params + nobs, nvar = x.shape + self._scale: float = nobs / (nobs - nvar) if debiased else 1.0 + self._name = "JIVE Covariance (Heteroskedastic)" + + @property + def cov(self) -> linearmodels.typing.data.Float64Array: + """Heteroskedasticity-robust covariance matrix.""" + x, x_loo, eps = self.x, self.x_loo, self.eps + nobs = x.shape[0] + bread = inv(x_loo.T @ x / nobs) + scores = x_loo * eps + meat = self._scale * scores.T @ scores / nobs + c = bread @ meat @ bread.T / nobs + return (c + c.T) / 2 + + @property + def s2(self) -> float: + """Estimated residual variance.""" + nobs = self.x.shape[0] + return float(self._scale * squeeze(self.eps.T @ self.eps) / nobs) + + @property + def debiased(self) -> bool: + """Flag indicating whether covariance is degree-of-freedom adjusted.""" + return self._debiased + + @property + def config(self) -> dict[str, Any]: + """Covariance configuration parameters.""" + return {"debiased": self.debiased} + + def __str__(self) -> str: + return f"{self._name}\nDebiased: {self._debiased}" + + def __repr__(self) -> str: + return self.__str__() + "\n" + self.__class__.__name__ + f", id: {hex(id(self))}" + + +# --------------------------------------------------------------------------- +# IVJIVE +# --------------------------------------------------------------------------- + + +class IVJIVE(_IVModelBase): + r""" + Jackknife Instrumental Variables Estimator (JIVE). + + JIVE replaces the 2SLS first-stage fitted values :math:`\hat{X} = P_Z X` + with *leave-one-out* first-stage predictions + + .. math:: + + \tilde{X}_i = \frac{(P_Z X)_i - h_i X_i}{1 - h_i}, + \qquad h_i = z_i'(Z'Z)^{-1}z_i, + + and defines the estimator as + + .. math:: + + \hat{\beta}_{\text{JIVE}} + = \bigl(\tilde{X}'X\bigr)^{-1} \tilde{X}'y. + + By using leave-one-out predictions, JIVE eliminates the finite-sample + bias of 2SLS that arises when the number of instruments is large relative + to the sample size. + + Parameters + ---------- + dependent : array_like + Endogenous variables (nobs by 1). + exog : array_like or None + Exogenous regressors (nobs by nexog). + endog : array_like or None + Endogenous regressors (nobs by nendog). + instruments : array_like or None + Excluded instruments (nobs by ninstr). + weights : array_like or None + Observation weights used in estimation. + + Notes + ----- + ``Z`` is the full instrument matrix ``[exog, instruments]``. The + hat-matrix leverage scores :math:`h_i` are computed without forming the + full :math:`n \times n` hat matrix: + + .. math:: + + h_i = z_i'(Z'Z)^{-1}z_i + = \bigl[Z(Z'Z)^{-1}\bigr]_i \cdot z_i. + + Standard errors are always heteroskedasticity-robust (sandwich form + with :math:`\tilde{X}` as the score instrument). + + See Also + -------- + IV2SLS, IVLIML, IVGMM, IVGMMCUE + + References + ---------- + Angrist, J. D., Imbens, G. W., & Krueger, A. B. (1999). + Jackknife instrumental variables estimation. + *Journal of Applied Econometrics*, 14(1), 57-67. + + Examples + -------- + >>> import numpy as np + >>> from linearmodels.iv import IVJIVE + >>> rng = np.random.default_rng(0) + >>> n = 500 + >>> z = rng.standard_normal((n, 3)) + >>> x_endog = z[:, 0] + rng.standard_normal(n) + >>> exog = np.ones((n, 1)) + >>> y = x_endog + rng.standard_normal(n) + >>> mod = IVJIVE(y, exog, x_endog[:, None], z) + >>> res = mod.fit() + >>> float(res.params.iloc[-1]) # doctest: +SKIP + 1.0... + """ + + def __init__( + self, + dependent: IVDataLike, + exog: IVDataLike | None, + endog: IVDataLike | None, + instruments: IVDataLike | None, + *, + weights: IVDataLike | None = None, + ) -> None: + self._method = "IV-JIVE" + super().__init__(dependent, exog, endog, instruments, weights=weights) + + @staticmethod + def from_formula( + formula: str, data: DataFrame, *, weights: IVDataLike | None = None + ) -> IVJIVE: + """ + Parameters + ---------- + formula : str + Formula modified for the IV syntax described in the notes section. + data : DataFrame + DataFrame containing the variables used in the formula. + weights : array_like, optional + Observation weights used in estimation. + + Returns + ------- + IVJIVE + Model instance. + + Notes + ----- + The IV formula modifies the standard formula syntax to include a block + of the form ``[endog ~ instruments]``. + + Examples + -------- + >>> from linearmodels.datasets import wage + >>> from linearmodels.iv import IVJIVE + >>> data = wage.load() + >>> formula = 'np.log(wage) ~ 1 + exper + exper ** 2 + [educ ~ sibs + brthord]' + >>> mod = IVJIVE.from_formula(formula, data) + """ + parser = IVFormulaParser(formula, data) + dep, exog, endog, instr = parser.data + mod = IVJIVE(dep, exog, endog, instr, weights=weights) + mod.formula = formula + return mod + + def fit( + self, *, cov_type: str = "robust", debiased: bool = False, **cov_config: Any + ) -> IVResults: + """ + Estimate model parameters using the jackknife IV estimator. + + Parameters + ---------- + cov_type : str + Accepted for API compatibility; JIVE always uses a + heteroskedasticity-robust sandwich covariance. + debiased : bool + Apply a small-sample degree-of-freedom adjustment. + **cov_config + Additional keyword arguments (accepted for API compatibility). + + Returns + ------- + IVResults + Estimation results. + + References + ---------- + Angrist, J. D., Imbens, G. W., & Krueger, A. B. (1999). + Jackknife instrumental variables estimation. + *Journal of Applied Econometrics*, 14(1), 57-67. + """ + wy, wx, wz = self._wy, self._wx, self._wz + + # --- Leverage scores (efficient: no n×n hat matrix) --- + ZtZ_inv = inv(wz.T @ wz) + A = wz @ ZtZ_inv # (n, ninstr) + h = (A * wz).sum(axis=1) # (n,) h_i = z_i'(Z'Z)^{-1}z_i + + if npany(h >= 1.0 - 1e-10): + raise ValueError( + "Perfect leverage detected (h_i ≈ 1) in at least one " + "observation. JIVE requires all leverage scores to be " + "strictly less than 1." + ) + + # --- First-stage predictions P_Z X --- + PZX = A @ (wz.T @ wx) # (n, k) + + # --- Leave-one-out predictions X̃_i = (PZX_i - h_i X_i)/(1-h_i) --- + X_loo = (PZX - h[:, None] * wx) / (1.0 - h)[:, None] + + # --- JIVE parameter estimate --- + params = inv(X_loo.T @ wx) @ (X_loo.T @ wy) # (k, 1) + + # --- Covariance --- + cov_est = _JIVECovariance(wx, wy, X_loo, params, debiased=debiased) + + pe = self._post_estimation(params, cov_est, "robust") + return IVResults(pe, self) diff --git a/linearmodels/iv/tests/test_jive.py b/linearmodels/iv/tests/test_jive.py new file mode 100644 index 0000000000..989265d58d --- /dev/null +++ b/linearmodels/iv/tests/test_jive.py @@ -0,0 +1,306 @@ +"""Tests for IVJIVE (Jackknife Instrumental Variables Estimator). + +Reference: Angrist, Imbens & Krueger (1999), Journal of Applied Econometrics. +""" + +from __future__ import annotations + +import numpy as np +import pytest +from numpy.testing import assert_allclose +from pandas import DataFrame + +from linearmodels.iv import IV2SLS, IVJIVE +from linearmodels.iv.results import IVResults + + +# --------------------------------------------------------------------------- +# DGP helpers +# --------------------------------------------------------------------------- + + +def _make_iv(n=500, k_instr=3, beta=1.5, seed=0): + """Simple IV DGP: one endogenous regressor, k_instr excluded instruments.""" + rng = np.random.default_rng(seed) + z = rng.standard_normal((n, k_instr)) + v = rng.standard_normal(n) + e = rng.standard_normal(n) + 0.5 * v # endogenous error + x_endog = z[:, 0] * 0.8 + v + y = x_endog * beta + e + exog = np.ones((n, 1)) + return y, exog, x_endog[:, None], z + + +def _make_iv_many(n=500, k_instr=15, beta=1.5, seed=1): + """Many-IV DGP with weak first stage — where JIVE matters most.""" + rng = np.random.default_rng(seed) + z = rng.standard_normal((n, k_instr)) + gamma = np.ones(k_instr) * 0.2 # weak instruments + v = rng.standard_normal(n) + x_endog = z @ gamma + v + e = rng.standard_normal(n) + 0.5 * v + y = x_endog * beta + e + exog = np.ones((n, 1)) + return y, exog, x_endog[:, None], z + + +# --------------------------------------------------------------------------- +# Import and basic API +# --------------------------------------------------------------------------- + + +def test_import(): + """IVJIVE is importable from linearmodels.iv.""" + from linearmodels.iv import IVJIVE # noqa: F401 + + +def test_method_name(): + y, exog, endog, z = _make_iv() + res = IVJIVE(y, exog, endog, z).fit() + assert res.model._method == "IV-JIVE" + + +def test_fit_returns_iv_results(): + y, exog, endog, z = _make_iv() + res = IVJIVE(y, exog, endog, z).fit() + assert isinstance(res, IVResults) + + +# --------------------------------------------------------------------------- +# Output shapes and structure +# --------------------------------------------------------------------------- + + +def test_params_shape_one_endog(): + y, exog, endog, z = _make_iv() + res = IVJIVE(y, exog, endog, z).fit() + assert res.params.shape == (2,) # const + endog + + +def test_params_labels(): + y, exog, endog, z = _make_iv() + y_df = DataFrame({"y": y}) + exog_df = DataFrame({"const": np.ones(len(y))}) + endog_df = DataFrame({"x": endog.squeeze()}) + z_df = DataFrame(z, columns=[f"z{i}" for i in range(z.shape[1])]) + res = IVJIVE(y_df, exog_df, endog_df, z_df).fit() + assert "x" in res.params.index + + +def test_std_errors_shape(): + y, exog, endog, z = _make_iv() + res = IVJIVE(y, exog, endog, z).fit() + assert res.std_errors.shape == (2,) + + +def test_std_errors_positive(): + y, exog, endog, z = _make_iv() + res = IVJIVE(y, exog, endog, z).fit() + assert np.all(res.std_errors > 0) + + +def test_cov_shape(): + y, exog, endog, z = _make_iv() + res = IVJIVE(y, exog, endog, z).fit() + assert res.cov.shape == (2, 2) + + +def test_cov_symmetric(): + y, exog, endog, z = _make_iv() + res = IVJIVE(y, exog, endog, z).fit() + assert_allclose(res.cov.values, res.cov.values.T, atol=1e-12) + + +def test_cov_psd(): + y, exog, endog, z = _make_iv() + res = IVJIVE(y, exog, endog, z).fit() + eigvals = np.linalg.eigvalsh(res.cov.values) + assert np.all(eigvals > -1e-10) + + +def test_r2_finite(): + y, exog, endog, z = _make_iv() + res = IVJIVE(y, exog, endog, z).fit() + assert np.isfinite(res.rsquared) + + +def test_residuals_shape(): + y, exog, endog, z = _make_iv(n=300) + res = IVJIVE(y, exog, endog, z).fit() + assert res.resids.shape == (300,) + + +# --------------------------------------------------------------------------- +# Consistency: JIVE → true parameter as n → ∞ +# --------------------------------------------------------------------------- + + +def test_jive_consistent_large_n(): + """With a large sample and strong instruments, JIVE should be close to 1.5.""" + y, exog, endog, z = _make_iv(n=3000, k_instr=3, beta=1.5, seed=10) + res = IVJIVE(y, exog, endog, z).fit() + assert_allclose(float(res.params.iloc[-1]), 1.5, atol=0.15) + + +def test_jive_close_to_2sls_few_instruments(): + """With few instruments, JIVE ≈ 2SLS (both consistent, similar finite samples).""" + y, exog, endog, z = _make_iv(n=2000, k_instr=2, beta=1.5, seed=7) + jive = IVJIVE(y, exog, endog, z).fit() + tsls = IV2SLS(y, exog, endog, z).fit() + assert_allclose( + float(jive.params.iloc[-1]), + float(tsls.params.iloc[-1]), + atol=0.15, + ) + + +def test_jive_less_biased_than_2sls_many_instruments(): + """In a many-IV setting, JIVE point estimate should be closer to the truth.""" + y, exog, endog, z = _make_iv_many(n=2000, k_instr=15, beta=1.5, seed=42) + jive = IVJIVE(y, exog, endog, z).fit() + tsls = IV2SLS(y, exog, endog, z).fit() + beta = 1.5 + jive_err = abs(float(jive.params.iloc[-1]) - beta) + tsls_err = abs(float(tsls.params.iloc[-1]) - beta) + # JIVE need not always win a single draw; allow generous test + assert jive_err < tsls_err + 0.3 + + +# --------------------------------------------------------------------------- +# Leverage scores +# --------------------------------------------------------------------------- + + +def test_leverage_scores_in_unit_interval(): + """All leverage scores h_i must be in [0, 1).""" + y, exog, endog, z = _make_iv(n=300) + mod = IVJIVE(y, exog, endog, z) + wx, wz = mod._wx, mod._wz + from numpy.linalg import inv as _inv + ZtZ_inv = _inv(wz.T @ wz) + A = wz @ ZtZ_inv + h = (A * wz).sum(axis=1) + assert np.all(h >= 0) + assert np.all(h < 1) + + +def test_leverage_sum_equals_rank(): + """Sum of leverage scores equals the rank of Z (number of instrument columns).""" + y, exog, endog, z = _make_iv(n=300, k_instr=4) + mod = IVJIVE(y, exog, endog, z) + wx, wz = mod._wx, mod._wz + from numpy.linalg import inv as _inv + ZtZ_inv = _inv(wz.T @ wz) + A = wz @ ZtZ_inv + h = (A * wz).sum(axis=1) + # Sum h_i = trace(P_Z) = rank(Z) = ncols of wz + assert_allclose(h.sum(), wz.shape[1], atol=1e-8) + + +# --------------------------------------------------------------------------- +# debiased flag +# --------------------------------------------------------------------------- + + +def test_debiased_larger_stderr(): + """Debiased SEs should be >= undebiased SEs (both positive).""" + y, exog, endog, z = _make_iv() + res = IVJIVE(y, exog, endog, z).fit(debiased=False) + res_db = IVJIVE(y, exog, endog, z).fit(debiased=True) + assert np.all(res_db.std_errors.values >= res.std_errors.values - 1e-12) + + +def test_debiased_flag_stored(): + y, exog, endog, z = _make_iv() + res = IVJIVE(y, exog, endog, z).fit(debiased=True) + assert res.model._method == "IV-JIVE" + + +# --------------------------------------------------------------------------- +# from_formula +# --------------------------------------------------------------------------- + + +def test_from_formula(): + """from_formula should produce the same estimates as the array API.""" + y, exog, endog, z = _make_iv(n=400, seed=5) + n = len(y) + data = DataFrame({ + "y": y.squeeze(), + "x": endog.squeeze(), + "z0": z[:, 0], "z1": z[:, 1], "z2": z[:, 2], + }) + res_array = IVJIVE(y, exog, endog, z).fit() + res_formula = IVJIVE.from_formula("y ~ 1 + [x ~ z0 + z1 + z2]", data).fit() + assert_allclose( + res_array.params.values, + res_formula.params.values, + atol=1e-10, + ) + + +# --------------------------------------------------------------------------- +# Covariance estimator object +# --------------------------------------------------------------------------- + + +def test_cov_estimator_type_string(): + """IVResults.cov_estimator is the cov_type string (linearmodels API).""" + y, exog, endog, z = _make_iv() + res = IVJIVE(y, exog, endog, z).fit() + assert isinstance(res.cov_estimator, str) + assert res.cov_estimator == "robust" + + +def test_cov_config_stored(): + """cov_config dict is accessible and includes debiased key.""" + y, exog, endog, z = _make_iv() + res = IVJIVE(y, exog, endog, z).fit(debiased=True) + assert "debiased" in res.cov_config + assert res.cov_config["debiased"] is True + + +# --------------------------------------------------------------------------- +# Weights +# --------------------------------------------------------------------------- + + +def test_weighted_estimation_runs(): + y, exog, endog, z = _make_iv(n=300) + n = len(y) + weights = np.abs(np.random.default_rng(0).standard_normal(n)) + 0.1 + res = IVJIVE(y, exog, endog, z, weights=weights).fit() + assert np.isfinite(float(res.params.iloc[-1])) + + +# --------------------------------------------------------------------------- +# Multiple endogenous regressors +# --------------------------------------------------------------------------- + + +def test_two_endog_regressors(): + """JIVE with two endogenous regressors and enough instruments.""" + rng = np.random.default_rng(3) + n, k_instr = 800, 5 + z = rng.standard_normal((n, k_instr)) + v1, v2 = rng.standard_normal(n), rng.standard_normal(n) + x1 = z[:, 0] + v1 + x2 = z[:, 1] + v2 + y = x1 * 1.0 + x2 * 2.0 + rng.standard_normal(n) + 0.3 * (v1 + v2) + exog = np.ones((n, 1)) + endog = np.column_stack([x1, x2]) + res = IVJIVE(y, exog, endog, z).fit() + assert res.params.shape == (3,) # const + 2 endog + assert np.all(np.isfinite(res.params.values)) + + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- + + +def test_summary_runs(): + y, exog, endog, z = _make_iv() + res = IVJIVE(y, exog, endog, z).fit() + s = res.summary + assert s is not None