From f96a350aa66c0f3897861dc91142f186e46b5ced Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:35 -0700 Subject: [PATCH 01/41] WIP SemImpliedState --- src/types.jl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/types.jl b/src/types.jl index eb251a3b..f1ed3d12 100644 --- a/src/types.jl +++ b/src/types.jl @@ -46,6 +46,8 @@ If you have a special kind of data, e.g. ordinal data, you should implement a su abstract type SemObserved end """ + abstract type SemImplied + Supertype of all objects that can serve as the implied field of a SEM. Computes model-implied values that should be compared with the observed data to find parameter estimates, e. g. the model implied covariance or mean. @@ -56,6 +58,22 @@ abstract type SemImplied end "Subtype of SemImplied for all objects that can serve as the implied field of a SEM and use some form of symbolic precomputation." abstract type SemImpliedSymbolic <: SemImplied end +""" + abstract type SemImpliedState + +State of [`SemImplied`](@ref) that corresponds to the specific SEM parameter values. + +Contains the necessary vectors and matrices for calculating the SEM +objective, gradient and hessian (whichever is requested). +""" +abstract type SemImpliedState{I <: SemImplied} end + +impliedtype(::Type{SemImpliedState{I}}) where {I} = I +impliedtype(state::SemImpliedState) = impliedtype(typeof(state)) +implied(state::SemImpliedState) = state.implied +MeanStruct(::Type{S}) where {S <: SemImpliedState} = MeanStruct(impliedtype(state)) +HessianEval(::Type{S}) where {S <: SemImpliedState} = HessianEval(impliedtype(state)) + """ abstract type SemLoss{O <: SemObserved, I <: SemImplied} <: AbstractLoss From 9381ae30e9074496c79b2990559eff8ecefe4cb0 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:35 -0700 Subject: [PATCH 02/41] declare cov matrices symmetric --- src/frontend/fit/fitmeasures/minus2ll.jl | 2 +- src/observed/covariance.jl | 4 ++-- src/observed/data.jl | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/frontend/fit/fitmeasures/minus2ll.jl b/src/frontend/fit/fitmeasures/minus2ll.jl index 1cdf5c07..661c1ee0 100644 --- a/src/frontend/fit/fitmeasures/minus2ll.jl +++ b/src/frontend/fit/fitmeasures/minus2ll.jl @@ -39,7 +39,7 @@ function minus2ll(observed::SemObservedMissing) # FIXME: this code is duplicate to objective(fiml, ...) F = sum(observed.patterns) do pat # implied covariance/mean - Σᵢ = Σ[pat.measured_mask, pat.measured_mask] + Σᵢ = Symmetric(Σ[pat.measured_mask, pat.measured_mask]) Σᵢ_chol = cholesky!(Σᵢ) ld = logdet(Σᵢ_chol) Σᵢ⁻¹ = LinearAlgebra.inv!(Σᵢ_chol) diff --git a/src/observed/covariance.jl b/src/observed/covariance.jl index f81fe8e5..43960f62 100644 --- a/src/observed/covariance.jl +++ b/src/observed/covariance.jl @@ -3,7 +3,7 @@ Type alias for [`SemObservedData`](@ref) that has mean and covariance, but no ac For instances of `SemObservedCovariance` [`samples`](@ref) returns `nothing`. """ -const SemObservedCovariance{S} = SemObservedData{Nothing, S} +const SemObservedCovariance{C, S} = SemObservedData{Nothing, C, S} """ SemObservedCovariance(; @@ -76,5 +76,5 @@ function SemObservedCovariance(; obs_mean = obs_mean[obs_vars_perm] end - return SemObservedData(nothing, obs_vars, obs_cov, obs_mean, nsamples) + return SemObservedData(nothing, obs_vars, Symmetric(obs_cov), obs_mean, nsamples) end diff --git a/src/observed/data.jl b/src/observed/data.jl index 30d433e0..39eebe30 100644 --- a/src/observed/data.jl +++ b/src/observed/data.jl @@ -23,10 +23,10 @@ For observed data without missings. - `obs_cov(::SemObservedData)` -> observed covariance matrix - `obs_mean(::SemObservedData)` -> observed mean vector """ -struct SemObservedData{D <: Union{Nothing, AbstractMatrix}, S <: Number} <: SemObserved +struct SemObservedData{D <: Union{Nothing, AbstractMatrix}, C, S <: Number} <: SemObserved data::D observed_vars::Vector{Symbol} - obs_cov::Matrix{S} + obs_cov::C obs_mean::Vector{S} nsamples::Int end @@ -58,7 +58,7 @@ function SemObservedData(; throw end - return SemObservedData(data, obs_vars, obs_cov, vec(obs_mean), size(data, 1)) + return SemObservedData(data, obs_vars, Symmetric(obs_cov), vec(obs_mean), size(data, 1)) end ############################################################################################ From 3c26606b074b9de58cf6be0e48c9da1942c16e03 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:35 -0700 Subject: [PATCH 03/41] RAM: reuse sigma array --- src/loss/ML/ML.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index cf119832..af6425dd 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -208,7 +208,12 @@ function evaluate!(objective, gradient, hessian, loss::SemML, par) ∇A = implied.∇A ∇S = implied.∇S - C = F⨉I_A⁻¹' * (I - Σ⁻¹Σₒ) * Σ⁻¹ * F⨉I_A⁻¹ + # reuse Σ⁻¹Σₒ to calculate I-Σ⁻¹Σₒ + one_Σ⁻¹Σₒ = Σ⁻¹Σₒ + one_Σ⁻¹Σₒ .*= -1 + one_Σ⁻¹Σₒ[diagind(one_Σ⁻¹Σₒ)] .+= 1 + + C = F⨉I_A⁻¹' * one_Σ⁻¹Σₒ * Σ⁻¹ * F⨉I_A⁻¹ mul!(gradient, ∇A', vec(C * S * I_A⁻¹'), 2, 0) mul!(gradient, ∇S', vec(C), 1, 1) From 9fc99c3cc44685de872ea6b87879da430b5cca11 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:35 -0700 Subject: [PATCH 04/41] RAM: optional sparse Sigma matrix --- src/implied/RAM/generic.jl | 5 ++++- src/loss/ML/ML.jl | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/implied/RAM/generic.jl b/src/implied/RAM/generic.jl index 1569b341..58b689f8 100644 --- a/src/implied/RAM/generic.jl +++ b/src/implied/RAM/generic.jl @@ -90,6 +90,7 @@ function RAM( spec::SemSpecification; #vech = false, gradient_required = true, + sparse_S::Bool = true, kwargs..., ) ram_matrices = convert(RAMMatrices, spec) @@ -102,7 +103,9 @@ function RAM( #preallocate arrays rand_params = randn(Float64, n_par) A_pre = check_acyclic(materialize(ram_matrices.A, rand_params)) - S_pre = materialize(ram_matrices.S, rand_params) + S_pre = Symmetric( + (sparse_S ? sparse_materialize : materialize)(ram_matrices.S, rand_params), + ) F = copy(ram_matrices.F) # pre-allocate some matrices diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index af6425dd..534801ff 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -214,7 +214,7 @@ function evaluate!(objective, gradient, hessian, loss::SemML, par) one_Σ⁻¹Σₒ[diagind(one_Σ⁻¹Σₒ)] .+= 1 C = F⨉I_A⁻¹' * one_Σ⁻¹Σₒ * Σ⁻¹ * F⨉I_A⁻¹ - mul!(gradient, ∇A', vec(C * S * I_A⁻¹'), 2, 0) + mul!(gradient, ∇A', vec(C * mul!(similar(C), S, I_A⁻¹')), 2, 0) mul!(gradient, ∇S', vec(C), 1, 1) if MeanStruct(implied) === HasMeanStruct From 0ff351b9e3fa971cade0635e8df399d85865c132 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:35 -0700 Subject: [PATCH 05/41] ML: refactor to minimize allocs * preallocate matrices for intermediate gradient calculation * call mul!() with these preallocating matrices * annotate mul!() arguments as triangular/symmetric to use faster routines --- src/loss/ML/ML.jl | 95 ++++++++++++++++++++++++++++++----------------- 1 file changed, 61 insertions(+), 34 deletions(-) diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index 534801ff..5052aa8d 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -27,13 +27,19 @@ my_ml = SemML(my_observed, my_implied) Analytic gradients are available, and for models without a meanstructure and `RAMSymbolic` implied type, also analytic hessians. """ -struct SemML{O, I, HE <: HessianEval, INV, M, M2} <: SemLoss{O, I} +struct SemML{O, I, HE <: HessianEval, M} <: SemLoss{O, I} observed::O implied::I hessianeval::HE - Σ⁻¹::INV - Σ⁻¹Σₒ::M - meandiff::M2 + + # pre-allocated arrays to store intermediate results in evaluate!() + obsXobs_1::M + obsXobs_2::M + obsXobs_3::M + obsXvar_1::M + varXvar_1::M + varXvar_2::M + varXvar_3::M end ############################################################################################ @@ -63,26 +69,27 @@ function SemML( end # check integrity check_observed_vars(observed, implied) + @assert isnothing(refloss) || ( + nobserved_vars(refloss.observed) == nobserved_vars(observed) && + nvars(refloss.implied) == nvars(implied) + ) he = approximate_hessian ? ApproxHessian() : ExactHessian() - obsmean = obs_mean(observed) - obscov = obs_cov(observed) - meandiff = isnothing(obsmean) ? nothing : copy(obsmean) - - return SemML{ - typeof(observed), - typeof(implied), - typeof(he), - typeof(obscov), - typeof(obscov), - typeof(meandiff), - }( + obsXobs = parent(obs_cov(observed)) + nobs = nobserved_vars(observed) + nvar = nvars(implied) + + return SemML{typeof(observed), typeof(implied), typeof(he), typeof(obsXobs)}( observed, implied, he, - similar(obscov), - similar(obscov), - meandiff, + isnothing(refloss) ? similar(obsXobs) : refloss.obsXobs_1, + isnothing(refloss) ? similar(obsXobs) : refloss.obsXobs_2, + isnothing(refloss) ? similar(obsXobs) : refloss.obsXobs_3, + isnothing(refloss) ? similar(obsXobs, (nobs, nvar)) : refloss.obsXvar_1, + isnothing(refloss) ? similar(obsXobs, (nvar, nvar)) : refloss.varXvar_1, + isnothing(refloss) ? similar(obsXobs, (nvar, nvar)) : refloss.varXvar_2, + isnothing(refloss) ? similar(obsXobs, (nvar, nvar)) : refloss.varXvar_3, ) end @@ -109,10 +116,8 @@ function evaluate!( Σ = implied.Σ Σₒ = obs_cov(observed(loss)) - Σ⁻¹Σₒ = loss.Σ⁻¹Σₒ - Σ⁻¹ = loss.Σ⁻¹ - copyto!(Σ⁻¹, Σ) + Σ⁻¹ = copy!(loss.obsXobs_1, Σ) Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) if !isposdef(Σ_chol) #@warn "∑⁻¹ is not positive definite" @@ -123,7 +128,7 @@ function evaluate!( end ld = logdet(Σ_chol) Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + Σ⁻¹Σₒ = mul!(loss.obsXobs_2, Σ⁻¹, Σₒ) isnothing(objective) || (objective = ld + tr(Σ⁻¹Σₒ)) if MeanStruct(implied) === HasMeanStruct @@ -136,12 +141,15 @@ function evaluate!( ∇Σ = implied.∇Σ ∇μ = implied.∇μ μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ - mul!(gradient, ∇Σ', vec(Σ⁻¹ - Σ⁻¹Σₒ * Σ⁻¹ - μ₋ᵀΣ⁻¹'μ₋ᵀΣ⁻¹)) + J = copyto!(loss.obsXobs_3, Σ⁻¹) + mul!(J, Σ⁻¹Σₒ, Σ⁻¹, -1, 1) + mul!(J, μ₋ᵀΣ⁻¹', μ₋ᵀΣ⁻¹, -1, 1) + mul!(gradient, ∇Σ', vec(J)) mul!(gradient, ∇μ', μ₋ᵀΣ⁻¹', -2, 1) end elseif !isnothing(gradient) || !isnothing(hessian) ∇Σ = implied.∇Σ - Σ⁻¹ΣₒΣ⁻¹ = Σ⁻¹Σₒ * Σ⁻¹ + Σ⁻¹ΣₒΣ⁻¹ = mul!(loss.obsXobs_3, Σ⁻¹Σₒ, Σ⁻¹) J = vec(Σ⁻¹ - Σ⁻¹ΣₒΣ⁻¹)' if !isnothing(gradient) mul!(gradient, ∇Σ', J') @@ -175,9 +183,8 @@ function evaluate!(objective, gradient, hessian, loss::SemML, par) Σ = implied.Σ Σₒ = obs_cov(observed(loss)) - Σ⁻¹Σₒ = loss.Σ⁻¹Σₒ - Σ⁻¹ = loss.Σ⁻¹ - copyto!(Σ⁻¹, Σ) + + Σ⁻¹ = copy!(loss.obsXobs_1, Σ) Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) if !isposdef(Σ_chol) #@warn "Σ⁻¹ is not positive definite" @@ -188,7 +195,7 @@ function evaluate!(objective, gradient, hessian, loss::SemML, par) end ld = logdet(Σ_chol) Σ⁻¹ = LinearAlgebra.inv!(Σ_chol) - mul!(Σ⁻¹Σₒ, Σ⁻¹, Σₒ) + Σ⁻¹Σₒ = mul!(loss.obsXobs_2, Σ⁻¹, Σₒ) if !isnothing(objective) objective = ld + tr(Σ⁻¹Σₒ) @@ -210,11 +217,25 @@ function evaluate!(objective, gradient, hessian, loss::SemML, par) # reuse Σ⁻¹Σₒ to calculate I-Σ⁻¹Σₒ one_Σ⁻¹Σₒ = Σ⁻¹Σₒ - one_Σ⁻¹Σₒ .*= -1 + lmul!(-1, one_Σ⁻¹Σₒ) one_Σ⁻¹Σₒ[diagind(one_Σ⁻¹Σₒ)] .+= 1 - C = F⨉I_A⁻¹' * one_Σ⁻¹Σₒ * Σ⁻¹ * F⨉I_A⁻¹ - mul!(gradient, ∇A', vec(C * mul!(similar(C), S, I_A⁻¹')), 2, 0) + C = mul!( + loss.varXvar_1, + F⨉I_A⁻¹', + mul!( + loss.obsXvar_1, + Symmetric(mul!(loss.obsXobs_3, one_Σ⁻¹Σₒ, Σ⁻¹)), + F⨉I_A⁻¹, + ), + ) + mul!( + gradient, + ∇A', + vec(mul!(loss.varXvar_3, Symmetric(C), mul!(loss.varXvar_2, S, I_A⁻¹'))), + 2, + 0, + ) mul!(gradient, ∇S', vec(C), 1, 1) if MeanStruct(implied) === HasMeanStruct @@ -226,8 +247,14 @@ function evaluate!(objective, gradient, hessian, loss::SemML, par) μ₋ᵀΣ⁻¹ = μ₋' * Σ⁻¹ k = μ₋ᵀΣ⁻¹ * F⨉I_A⁻¹ mul!(gradient, ∇M', k', -2, 1) - mul!(gradient, ∇A', vec(k' * (I_A⁻¹ * (M + S * k'))'), -2, 1) - mul!(gradient, ∇S', vec(k'k), -1, 1) + mul!( + gradient, + ∇A', + vec(mul!(loss.varXvar_1, k', (I_A⁻¹ * (M + S * k'))')), + -2, + 1, + ) + mul!(gradient, ∇S', vec(mul!(loss.varXvar_2, k', k)), -1, 1) end end From e37e026f1b7a72e3ba9e44fcafe4efdd4204ed7a Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:35 -0700 Subject: [PATCH 06/41] add PackageExtensionCompat --- Project.toml | 1 + src/StructuralEquationModels.jl | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index eabc5b36..f56b4ed7 100644 --- a/Project.toml +++ b/Project.toml @@ -14,6 +14,7 @@ LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NLSolversBase = "d41bc354-129a-5804-8e4c-c37616107c6c" Optim = "429524aa-4258-5aef-a3af-852621145aeb" +PackageExtensionCompat = "65ce6f38-6b18-4e1d-a461-8949797d7930" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index 0dbcd16a..5c3b6030 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -15,7 +15,8 @@ using LinearAlgebra, LazyArtifacts, DelimitedFiles, DataFrames, - ProgressMeter + ProgressMeter, + PackageExtensionCompat import StatsAPI: params, coef, coefnames, dof, fit, nobs, coeftable @@ -210,4 +211,9 @@ export AbstractSem, ←, ↔, ⇔ + +function __init__() + @require_extensions +end + end From 2a0d13ed867823dc5d00d3d18202aa4fbca8b7d5 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:35 -0700 Subject: [PATCH 07/41] variance_params(SEMSpec) --- src/frontend/specification/ParameterTable.jl | 12 ++++++++++++ src/frontend/specification/RAMMatrices.jl | 11 +++++++++++ 2 files changed, 23 insertions(+) diff --git a/src/frontend/specification/ParameterTable.jl b/src/frontend/specification/ParameterTable.jl index c9b9dc24..102f81b2 100644 --- a/src/frontend/specification/ParameterTable.jl +++ b/src/frontend/specification/ParameterTable.jl @@ -405,6 +405,18 @@ function update_se_hessian!( return update_partable!(partable, :se, param_labels(fit), se) end +function variance_params(partable::ParameterTable) + res = [ + param for (param, rel, from, to) in zip( + partable.columns.param, + partable.columns.relation, + partable.columns.from, + partable.columns.to, + ) if (rel == :↔) && (from == to) + ] + unique!(res) +end + """ lavaan_params!(out::AbstractVector, partable_lav, partable::ParameterTable, diff --git a/src/frontend/specification/RAMMatrices.jl b/src/frontend/specification/RAMMatrices.jl index d430e9c0..ca77041d 100644 --- a/src/frontend/specification/RAMMatrices.jl +++ b/src/frontend/specification/RAMMatrices.jl @@ -62,6 +62,17 @@ function latent_vars(ram::RAMMatrices) end end +function variance_params(ram::RAMMatrices) + S_diaginds = Set(diagind(ram.S)) + varparams = Vector{Symbol}() + for (i, param) in enumerate(ram.params) + if any(∈(S_diaginds), param_occurences(ram.S, i)) + push!(varparams, param) + end + end + return unique!(varparams) +end + ############################################################################################ ### Constructor ############################################################################################ From 7c625d036112935d37a476e6c0d77691a12f2f0b Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:35 -0700 Subject: [PATCH 08/41] fixup docstring --- src/frontend/specification/ParameterTable.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/frontend/specification/ParameterTable.jl b/src/frontend/specification/ParameterTable.jl index 102f81b2..daca3dee 100644 --- a/src/frontend/specification/ParameterTable.jl +++ b/src/frontend/specification/ParameterTable.jl @@ -293,11 +293,11 @@ function update_partable!( end """ - (1) update_partable!(partable::AbstractParameterTable, column, fitted:SemFit, params, default = nothing) - - (2) update_partable!(partable::AbstractParameterTable, column, param_labels::Vector{Symbol}, params, default = nothing) + update_partable!(partable::AbstractParameterTable, column, fitted:SemFit, params, default = nothing) -Add a new column to a parameter table. + update_partable!(partable::AbstractParameterTable, column, param_labels::Vector{Symbol}, params, default = nothing) + +Add a new column to a parameter table. `column` is the name of the column, `params` contains the values of the new column, and `fitted` or `param_labels` is used to match the values to the correct parameter labels. The `default` value is used if a parameter in `partable` does not occur in `param_labels`. From 6ab19799ee266cf1cb8af763cbc53f21ba038747 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:35 -0700 Subject: [PATCH 09/41] lavaan_model() --- src/frontend/specification/ParameterTable.jl | 75 ++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/src/frontend/specification/ParameterTable.jl b/src/frontend/specification/ParameterTable.jl index daca3dee..739ad571 100644 --- a/src/frontend/specification/ParameterTable.jl +++ b/src/frontend/specification/ParameterTable.jl @@ -552,3 +552,78 @@ lavaan_params( lav_col::Symbol = :est, lav_group = nothing, ) = lavaan_params!(fill(NaN, nparams(partable)), partable_lav, partable, lav_col, lav_group) + +""" + lavaan_model(partable::ParameterTable) + +Generate lavaan model definition from a `partable`. +""" +function lavaan_model(partable::ParameterTable) + latent_vars = Set(partable.variables.latent) + observed_vars = Set(partable.variables.observed) + + variance_defs = Dict{Symbol, IOBuffer}() + latent_dep_defs = Dict{Symbol, IOBuffer}() + latent_regr_defs = Dict{Symbol, IOBuffer}() + observed_regr_defs = Dict{Symbol, IOBuffer}() + + model = IOBuffer() + for (from, to, rel, param, value, free) in zip( + partable.columns.from, + partable.columns.to, + partable.columns.relation, + partable.columns.param, + partable.columns.value_fixed, + partable.columns.free, + ) + function append_param(io) + if free + @assert param != :const + param == Symbol("") || write(io, "$param * ") + else + write(io, "$value * ") + end + end + function append_rhs(io) + if position(io) > 0 + write(io, " + ") + end + append_param(io) + write(io, "$to") + end + + if from == Symbol("1") + write(model, "$to ~ ") + append_param(model) + write(model, "1\n") + else + if rel == :↔ + variance_def = get!(() -> IOBuffer(), variance_defs, from) + append_rhs(variance_def) + elseif rel == :→ + if (from ∈ latent_vars) && (to ∈ observed_vars) + latent_dep_def = get!(() -> IOBuffer(), latent_dep_defs, from) + append_rhs(latent_dep_def) + elseif (from ∈ latent_vars) && (to ∈ latent_vars) + latent_regr_def = get!(() -> IOBuffer(), latent_regr_defs, from) + append_rhs(latent_regr_def) + else + observed_regr_def = get!(() -> IOBuffer(), observed_regr_defs, from) + append_rhs(observed_regr_def) + end + end + end + end + function write_rules(io, defs, relation) + vars = sort!(collect(keys(defs))) + for var in vars + write(io, String(var), " ", relation, " ") + write(io, String(take!(defs[var])), "\n") + end + end + write_rules(model, latent_dep_defs, "=~") + write_rules(model, latent_regr_defs, "~") + write_rules(model, observed_regr_defs, "~") + write_rules(model, variance_defs, "~~") + return String(take!(model)) +end From 0081538a5385a229535b22c38f9517521621bb0c Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:35 -0700 Subject: [PATCH 10/41] test_grad/hess(): check that alt calls give same results --- test/examples/helper.jl | 56 ++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 31 deletions(-) diff --git a/test/examples/helper.jl b/test/examples/helper.jl index fed95f3c..daf34bb4 100644 --- a/test/examples/helper.jl +++ b/test/examples/helper.jl @@ -9,44 +9,38 @@ function test_gradient(model, params; rtol = 1e-10, atol = 0) @test nparams(model) == length(params) true_grad = FiniteDiff.finite_difference_gradient(Base.Fix1(objective!, model), params) - gradient = similar(params) - # F and G - fill!(gradient, NaN) - gradient!(gradient, model, params) - @test gradient ≈ true_grad rtol = rtol atol = atol + gradient_G = fill!(similar(params), NaN) + gradient!(gradient_G, model, params) + gradient_FG = fill!(similar(params), NaN) + objective_gradient!(gradient_FG, model, params) - # only G - fill!(gradient, NaN) - objective_gradient!(gradient, model, params) - @test gradient ≈ true_grad rtol = rtol atol = atol + @test gradient_G == gradient_FG + + #@info "G norm = $(norm(gradient_G - true_grad, Inf))" + @test gradient_G ≈ true_grad rtol = rtol atol = atol end function test_hessian(model, params; rtol = 1e-4, atol = 0) true_hessian = FiniteDiff.finite_difference_hessian(Base.Fix1(objective!, model), params) - hessian = similar(params, size(true_hessian)) - gradient = similar(params) - - # H - fill!(hessian, NaN) - hessian!(hessian, model, params) - @test hessian ≈ true_hessian rtol = rtol atol = atol - - # F and H - fill!(hessian, NaN) - objective_hessian!(hessian, model, params) - @test hessian ≈ true_hessian rtol = rtol atol = atol - - # G and H - fill!(hessian, NaN) - gradient_hessian!(gradient, hessian, model, params) - @test hessian ≈ true_hessian rtol = rtol atol = atol - - # F, G and H - fill!(hessian, NaN) - objective_gradient_hessian!(gradient, hessian, model, params) - @test hessian ≈ true_hessian rtol = rtol atol = atol + gradient = fill!(similar(params), NaN) + + hessian_H = fill!(similar(parent(true_hessian)), NaN) + hessian!(hessian_H, model, params) + + hessian_FH = fill!(similar(hessian_H), NaN) + objective_hessian!(hessian_FH, model, params) + + hessian_GH = fill!(similar(hessian_H), NaN) + gradient_hessian!(gradient, hessian_GH, model, params) + + hessian_FGH = fill!(similar(hessian_H), NaN) + objective_gradient_hessian!(gradient, hessian_FGH, model, params) + + @test hessian_H == hessian_FH == hessian_GH == hessian_FGH + + @test hessian_H ≈ true_hessian rtol = rtol atol = atol end # map from the SEM.jl name of the fit measure to the lavaan's one From 91085e95f30dbfc02dca441858015e09f53f8f88 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:35 -0700 Subject: [PATCH 11/41] start_simple(): code cleanup also warn if params are overwritten --- .../start_val/start_simple.jl | 73 ++++++++++--------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/src/additional_functions/start_val/start_simple.jl b/src/additional_functions/start_val/start_simple.jl index afdbf92e..4f9d7063 100644 --- a/src/additional_functions/start_val/start_simple.jl +++ b/src/additional_functions/start_val/start_simple.jl @@ -33,52 +33,59 @@ function start_simple( start_means = 0.0, kwargs..., ) - A, S, F_ind, M, n_par = ram_matrices.A, - ram_matrices.S, - observed_var_indices(ram_matrices), - ram_matrices.M, - nparams(ram_matrices) + A, S, M = ram_matrices.A, ram_matrices.S, ram_matrices.M + obs_inds = Set(observed_var_indices(ram_matrices)) + C_indices = CartesianIndices(size(A)) - start_val = zeros(n_par) - n_var = nvars(ram_matrices) + start_vals = Vector{Float64}(undef, nparams(ram_matrices)) + for i in eachindex(start_vals) + par = 0.0 - C_indices = CartesianIndices((n_var, n_var)) - - for i in 1:n_par Si_ind = param_occurences(S, i) - Ai_ind = param_occurences(A, i) if length(Si_ind) != 0 # use the first occurence of the parameter to determine starting value c_ind = C_indices[Si_ind[1]] if c_ind[1] == c_ind[2] - if c_ind[1] ∈ F_ind - start_val[i] = start_variances_observed - else - start_val[i] = start_variances_latent - end + par = ifelse( + c_ind[1] ∈ obs_inds, + start_variances_observed, + start_variances_latent, + ) else - o1 = c_ind[1] ∈ F_ind - o2 = c_ind[2] ∈ F_ind - if o1 & o2 - start_val[i] = start_covariances_observed - elseif !o1 & !o2 - start_val[i] = start_covariances_latent - else - start_val[i] = start_covariances_obs_lat - end + o1 = c_ind[1] ∈ obs_inds + o2 = c_ind[2] ∈ obs_inds + par = ifelse( + o1 && o2, + start_covariances_observed, + ifelse(!o1 && !o2, start_covariances_latent, start_covariances_obs_lat), + ) end - elseif length(Ai_ind) != 0 + end + + Ai_ind = param_occurences(A, i) + if length(Ai_ind) != 0 + iszero(par) || + @warn "param[$i]=$(params(ram_matrices, i)) is already set to $par" c_ind = C_indices[Ai_ind[1]] - if (c_ind[1] ∈ F_ind) & !(c_ind[2] ∈ F_ind) - start_val[i] = start_loadings - else - start_val[i] = start_regressions + par = ifelse( + (c_ind[1] ∈ obs_inds) && !(c_ind[2] ∈ obs_inds), + start_loadings, + start_regressions, + ) + end + + if !isnothing(M) + Mi_inds = param_occurences(M, i) + if length(Mi_inds) != 0 + iszero(par) || + @warn "param[$i]=$(params(ram_matrices, i)) is already set to $par" + par = start_means end - elseif !isnothing(M) && (length(param_occurences(M, i)) != 0) - start_val[i] = start_means end + + start_vals[i] = par end - return start_val + return start_vals end # multigroup models From b81b77c7de4ef121103b4df1c0385d99015e73d7 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:35 -0700 Subject: [PATCH 12/41] start_simple(): start vals for lat and obs means --- src/additional_functions/start_val/start_simple.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/additional_functions/start_val/start_simple.jl b/src/additional_functions/start_val/start_simple.jl index 4f9d7063..2228bc92 100644 --- a/src/additional_functions/start_val/start_simple.jl +++ b/src/additional_functions/start_val/start_simple.jl @@ -30,7 +30,8 @@ function start_simple( start_covariances_observed = 0.0, start_covariances_latent = 0.0, start_covariances_obs_lat = 0.0, - start_means = 0.0, + start_mean_latent = 0.0, + start_mean_observed = 0.0, kwargs..., ) A, S, M = ram_matrices.A, ram_matrices.S, ram_matrices.M @@ -79,7 +80,7 @@ function start_simple( if length(Mi_inds) != 0 iszero(par) || @warn "param[$i]=$(params(ram_matrices, i)) is already set to $par" - par = start_means + par = ifelse(Mi_inds[1] ∈ obs_inds, start_mean_observed, start_mean_latent) end end From 4ae98a10ae98513dc7c18dca353018727cea330b Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:35 -0700 Subject: [PATCH 13/41] observed_vars(RAMMatrices; order): rows/cols order --- src/frontend/specification/RAMMatrices.jl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/frontend/specification/RAMMatrices.jl b/src/frontend/specification/RAMMatrices.jl index ca77041d..866c9c12 100644 --- a/src/frontend/specification/RAMMatrices.jl +++ b/src/frontend/specification/RAMMatrices.jl @@ -36,17 +36,22 @@ end latent_var_indices(ram::RAMMatrices) = [i for i in axes(ram.F, 2) if islatent_var(ram, i)] -# observed variables in the order as they appear in ram.F rows -function observed_vars(ram::RAMMatrices) +# observed variables, if order=:rows, the order is as they appear in ram.F rows +# if order=:columns, the order is as they appear in the comined variables list (ram.F columns) +function observed_vars(ram::RAMMatrices; order::Symbol = :rows) + order ∈ [:rows, :columns] || + throw(ArgumentError("order kwarg should be :rows or :columns")) if isnothing(ram.vars) @warn "Your RAMMatrices do not contain variable names. Please make sure the order of variables in your data is correct!" return nothing else + nobs = 0 obs_vars = Vector{Symbol}(undef, nobserved_vars(ram)) @inbounds for (i, v) in enumerate(vars(ram)) colptr = ram.F.colptr[i] if ram.F.colptr[i+1] > colptr # is observed - obs_vars[ram.F.rowval[colptr]] = v + nobs += 1 + obs_vars[order == :rows ? ram.F.rowval[colptr] : nobs] = v end end return obs_vars From bb7aed45c35cff643e68d12246e2b468a331bb7d Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 14/41] observed_var_indices(::RAMMatrices; order=:columns) --- src/frontend/specification/RAMMatrices.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/frontend/specification/RAMMatrices.jl b/src/frontend/specification/RAMMatrices.jl index 866c9c12..e20f83d5 100644 --- a/src/frontend/specification/RAMMatrices.jl +++ b/src/frontend/specification/RAMMatrices.jl @@ -22,13 +22,16 @@ vars(ram::RAMMatrices) = ram.vars isobserved_var(ram::RAMMatrices, i::Integer) = ram.F.colptr[i+1] > ram.F.colptr[i] islatent_var(ram::RAMMatrices, i::Integer) = ram.F.colptr[i+1] == ram.F.colptr[i] -# indices of observed variables in the order as they appear in ram.F rows -function observed_var_indices(ram::RAMMatrices) +# indices of observed variables, for order=:rows (default), the order is as they appear in ram.F rows +# if order=:columns, the order is as they appear in the comined variables list (ram.F columns) +function observed_var_indices(ram::RAMMatrices; order::Symbol = :rows) obs_inds = Vector{Int}(undef, nobserved_vars(ram)) + nobs = 0 @inbounds for i in 1:nvars(ram) colptr = ram.F.colptr[i] if ram.F.colptr[i+1] > colptr # is observed - obs_inds[ram.F.rowval[colptr]] = i + nobs += 1 + obs_inds[order == :rows ? ram.F.rowval[colptr] : nobs] = i end end return obs_inds From e1d875b4ea48d940643449c057f6fa5d0ee3f86e Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 15/41] move sparse mtx utils to new file use sparse utils in RAMMatrices --- src/StructuralEquationModels.jl | 1 + src/additional_functions/sparse_utils.jl | 83 +++++++++++++++++++++++ src/frontend/specification/RAMMatrices.jl | 33 ++------- 3 files changed, 88 insertions(+), 29 deletions(-) create mode 100644 src/additional_functions/sparse_utils.jl diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index 5c3b6030..cb46ce19 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -32,6 +32,7 @@ include("objective_gradient_hessian.jl") # helper objects and functions include("additional_functions/commutation_matrix.jl") +include("additional_functions/sparse_utils.jl") include("additional_functions/params_array.jl") # fitted objects diff --git a/src/additional_functions/sparse_utils.jl b/src/additional_functions/sparse_utils.jl new file mode 100644 index 00000000..76381c47 --- /dev/null +++ b/src/additional_functions/sparse_utils.jl @@ -0,0 +1,83 @@ +# generate sparse matrix with 1 in each row +function eachrow_to_col( + ::Type{T}, + column_indices::AbstractVector{Int}, + ncolumns::Integer, +) where {T} + nrows = length(column_indices) + (nrows > ncolumns) && throw( + DimensionMismatch( + "The number of rows ($nrows) cannot exceed the number of columns ($ncolumns)", + ), + ) + all(i -> 1 <= i <= ncolumns, column_indices) || + throw(ArgumentError("All column indices must be between 1 and $ncolumns")) + + sparse(eachindex(column_indices), column_indices, ones(T, nrows), nrows, ncolumns) +end + +eachrow_to_col(column_indices::AbstractVector{Int}, ncolumns::Integer) = + eachrow_to_col(Float64, column_indices, ncolumns) + +# generate sparse matrix with 1 in each column +function eachcol_to_row( + ::Type{T}, + row_indices::AbstractVector{Int}, + nrows::Integer, +) where {T} + ncols = length(row_indices) + (ncols > nrows) && throw( + DimensionMismatch( + "The number of columns ($ncols) cannot exceed the number of rows ($nrows)", + ), + ) + all(i -> 1 <= i <= nrows, row_indices) || + throw(ArgumentError("All row indices must be between 1 and $nrows")) + + sparse(row_indices, eachindex(row_indices), ones(T, ncols), nrows, ncols) +end + +eachcol_to_row(row_indices::AbstractVector{Int}, nrows::Integer) = + eachcol_to_row(Float64, row_indices, nrows) + +# non-zero indices of columns in matrix A generated by eachrow_to_col() +# if order == :rows, then the indices are in the order of the rows, +# if order == :columns, the indices are in the order of the columns +function nzcols_eachrow_to_col(F, A::SparseMatrixCSC; order::Symbol = :rows) + order ∈ [:rows, :columns] || throw(ArgumentError("order must be :rows or :columns")) + T = typeof(F(1)) + res = Vector{T}(undef, size(A, 1)) + n = 0 + for i in 1:size(A, 2) + colptr = A.colptr[i] + next_colptr = A.colptr[i+1] + if next_colptr > colptr # non-zero + @assert next_colptr - colptr == 1 + n += 1 + res[order == :rows ? A.rowval[colptr] : n] = F(i) + end + end + @assert n == size(A, 1) + return res +end + +nzcols_eachrow_to_col(A::SparseMatrixCSC; order::Symbol = :rows) = + nzcols_eachrow_to_col(identity, A, order = order) + +# same as nzcols_eachrow_to_col() +# but without assumption that each row cooresponds to exactly one column +# the order is always columns order +function nzcols(F, A::SparseMatrixCSC) + T = typeof(F(1)) + res = Vector{T}() + for i in 1:size(A, 2) + colptr = A.colptr[i] + next_colptr = A.colptr[i+1] + if next_colptr > colptr # non-zero + push!(res, F(i)) + end + end + return res +end + +nzcols(A::SparseMatrixCSC) = nzcols(identity, A) diff --git a/src/frontend/specification/RAMMatrices.jl b/src/frontend/specification/RAMMatrices.jl index e20f83d5..7997cc32 100644 --- a/src/frontend/specification/RAMMatrices.jl +++ b/src/frontend/specification/RAMMatrices.jl @@ -24,18 +24,8 @@ islatent_var(ram::RAMMatrices, i::Integer) = ram.F.colptr[i+1] == ram.F.colptr[i # indices of observed variables, for order=:rows (default), the order is as they appear in ram.F rows # if order=:columns, the order is as they appear in the comined variables list (ram.F columns) -function observed_var_indices(ram::RAMMatrices; order::Symbol = :rows) - obs_inds = Vector{Int}(undef, nobserved_vars(ram)) - nobs = 0 - @inbounds for i in 1:nvars(ram) - colptr = ram.F.colptr[i] - if ram.F.colptr[i+1] > colptr # is observed - nobs += 1 - obs_inds[order == :rows ? ram.F.rowval[colptr] : nobs] = i - end - end - return obs_inds -end +observed_var_indices(ram::RAMMatrices; order::Symbol = :rows) = + nzcols_eachrow_to_col(ram.F; order) latent_var_indices(ram::RAMMatrices) = [i for i in axes(ram.F, 2) if islatent_var(ram, i)] @@ -48,16 +38,7 @@ function observed_vars(ram::RAMMatrices; order::Symbol = :rows) @warn "Your RAMMatrices do not contain variable names. Please make sure the order of variables in your data is correct!" return nothing else - nobs = 0 - obs_vars = Vector{Symbol}(undef, nobserved_vars(ram)) - @inbounds for (i, v) in enumerate(vars(ram)) - colptr = ram.F.colptr[i] - if ram.F.colptr[i+1] > colptr # is observed - nobs += 1 - obs_vars[order == :rows ? ram.F.rowval[colptr] : nobs] = v - end - end - return obs_vars + return nzcols_eachrow_to_col(Base.Fix1(getindex, vars(ram)), ram.F; order = order) end end @@ -240,13 +221,7 @@ function RAMMatrices( return RAMMatrices( ParamsMatrix{T}(A_inds, A_consts, (n_vars, n_vars)), ParamsMatrix{T}(S_inds, S_consts, (n_vars, n_vars)), - sparse( - 1:n_observed, - [vars_index[var] for var in partable.observed_vars], - ones(T, n_observed), - n_observed, - n_vars, - ), + eachrow_to_col(T, [vars_index[var] for var in partable.observed_vars], n_vars), !isnothing(M_inds) ? ParamsVector{T}(M_inds, M_consts, (n_vars,)) : nothing, param_labels, vars_sorted, From 3f8dc7ce3f92405429e1aa88642fc0159d077293 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 16/41] reorder_observed_vars!(spec) method --- src/frontend/specification/ParameterTable.jl | 12 ++++++++++++ src/frontend/specification/RAMMatrices.jl | 13 +++++++++++++ 2 files changed, 25 insertions(+) diff --git a/src/frontend/specification/ParameterTable.jl b/src/frontend/specification/ParameterTable.jl index 739ad571..a29deae7 100644 --- a/src/frontend/specification/ParameterTable.jl +++ b/src/frontend/specification/ParameterTable.jl @@ -247,6 +247,18 @@ See [sort_vars!](@ref) for in-place version. """ sort_vars(partable::ParameterTable) = sort_vars!(deepcopy(partable)) +function reorder_observed_vars!(partable::ParameterTable, new_order::AbstractVector{Symbol}) + # just check that it's 1-to-1 + source_to_dest_perm( + partable.observed_vars, + new_order, + one_to_one = true, + entities = "observed_vars", + ) + copy!(partable.observed_vars, new_order) + return partable +end + # add a row -------------------------------------------------------------------------------- function Base.push!(partable::ParameterTable, d::Union{AbstractDict{Symbol}, NamedTuple}) diff --git a/src/frontend/specification/RAMMatrices.jl b/src/frontend/specification/RAMMatrices.jl index 7997cc32..023e98b9 100644 --- a/src/frontend/specification/RAMMatrices.jl +++ b/src/frontend/specification/RAMMatrices.jl @@ -234,6 +234,19 @@ Base.convert( param_labels::Union{AbstractVector{Symbol}, Nothing} = nothing, ) = RAMMatrices(partable; param_labels) +# reorders the observed variables in the RAMMatrices, i.e. the order of the rows in F +function reorder_observed_vars!(ram::RAMMatrices, new_order::AbstractVector{Symbol}) + # just check that it's 1-to-1 + src2dest = source_to_dest_perm( + observed_vars(ram), + new_order, + one_to_one = true, + entities = "observed_vars", + ) + copy!(ram.F, ram.F[src2dest, :]) + return ram +end + ############################################################################################ ### get parameter table from RAMMatrices ############################################################################################ From e3e4a4512eeeff635fabea8276504f1b3abdc4af Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 17/41] vech() and vechinds() functions --- src/additional_functions/helper.jl | 33 ++++++++++++++++++++++++++++++ src/implied/RAM/symbolic.jl | 5 +---- src/loss/WLS/WLS.jl | 3 +-- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/additional_functions/helper.jl b/src/additional_functions/helper.jl index 5442357f..d7c7d810 100644 --- a/src/additional_functions/helper.jl +++ b/src/additional_functions/helper.jl @@ -69,6 +69,39 @@ function elimination_matrix(n::Integer) return L end +# vector of lower-triangular values of a square matrix +function vech(A::AbstractMatrix{T}) where {T} + size(A, 1) == size(A, 2) || + throw(ArgumentError("Matrix must be square, $(size(A)) given")) + n = size(A, 1) + v = Vector{T}(undef, (n * (n + 1)) >> 1) + k = 0 + for (j, Aj) in enumerate(eachcol(A)), i in j:n + @inbounds v[k+=1] = Aj[i] + end + @assert k == length(v) + return v +end + +# vector of lower-triangular linear indices of a nXn square matrix +function vechinds(n::Integer) + A_lininds = LinearIndices((n, n)) + v = Vector{Int}(undef, (n * (n + 1)) >> 1) + k = 0 + for j in 1:n, i in j:n + @inbounds v[k+=1] = A_lininds[i, j] + end + @assert k == length(v) + return v +end + +# vector of lower-triangular linear indices of a square matrix +function vechinds(A::AbstractMatrix) + size(A, 1) == size(A, 2) || + throw(ArgumentError("Matrix must be square, $(size(A)) given")) + return vechinds(size(A, 1)) +end + # truncate eigenvalues of a symmetric matrix and return the result function trunc_eigvals( mtx::AbstractMatrix{T}, diff --git a/src/implied/RAM/symbolic.jl b/src/implied/RAM/symbolic.jl index 52a192e6..647b5b41 100644 --- a/src/implied/RAM/symbolic.jl +++ b/src/implied/RAM/symbolic.jl @@ -198,10 +198,7 @@ end function eval_Σ_symbolic(S, I_A⁻¹, F; vech::Bool = false, simplify::Bool = false) Σ = F * I_A⁻¹ * S * permutedims(I_A⁻¹) * permutedims(F) Σ = Array(Σ) - if vech - n = size(Σ, 1) - Σ = [Σ[i, j] for j in 1:n for i in j:n] - end + vech && (Σ = SEM.vech(Σ)) if simplify Threads.@threads for i in eachindex(Σ) Σ[i] = Symbolics.simplify(Σ[i]) diff --git a/src/loss/WLS/WLS.jl b/src/loss/WLS/WLS.jl index d067e346..2ccef15b 100644 --- a/src/loss/WLS/WLS.jl +++ b/src/loss/WLS/WLS.jl @@ -95,8 +95,7 @@ function SemWLS( check_observed_vars(observed, implied) nobs_vars = nobserved_vars(observed) - tril_ind = filter(x -> (x[1] >= x[2]), CartesianIndices(obs_cov(observed))) - s = obs_cov(observed)[tril_ind] + s = vech(obs_cov(observed)) size(s) == size(implied.Σ) || throw( DimensionMismatch( "SemWLS requires implied covariance to be in vech-ed form " * From 3fd6a2abd89bba56fbb5a69afd21857d772e2f38 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 18/41] RAMMatrices(): ctor to replace params --- src/frontend/specification/RAMMatrices.jl | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/frontend/specification/RAMMatrices.jl b/src/frontend/specification/RAMMatrices.jl index 023e98b9..357ab408 100644 --- a/src/frontend/specification/RAMMatrices.jl +++ b/src/frontend/specification/RAMMatrices.jl @@ -113,6 +113,17 @@ function RAMMatrices(; return RAMMatrices(A, S, F, M, copy(param_labels), vars) end +# copy RAMMatrices replacing the parameters vector +# (e.g. when reordering parameters or adding new parameters to the ensemble model) +RAMMatrices(ram::RAMMatrices; params::AbstractVector{Symbol}) = RAMMatrices(; + A = materialize(ram.A, SEM.params(ram)), + S = materialize(ram.S, SEM.params(ram)), + F = copy(ram.F), + M = !isnothing(ram.M) ? materialize(ram.M, SEM.params(ram)) : nothing, + params, + vars = ram.vars, +) + ############################################################################################ ### get RAMMatrices from parameter table ############################################################################################ From 3c218607f67827164a92e32e75b77975d91daf6b Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 19/41] use `@printf` to limit signif digits printed --- Project.toml | 1 + ext/SEMNLOptExt/NLopt.jl | 2 +- ext/SEMNLOptExt/SEMNLOptExt.jl | 2 +- ext/SEMProximalOptExt/ProximalAlgorithms.jl | 2 +- ext/SEMProximalOptExt/SEMProximalOptExt.jl | 1 + src/StructuralEquationModels.jl | 1 + src/frontend/fit/SemFit.jl | 2 +- src/frontend/fit/summary.jl | 3 +-- 8 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Project.toml b/Project.toml index f56b4ed7..cdc2f5a7 100644 --- a/Project.toml +++ b/Project.toml @@ -18,6 +18,7 @@ PackageExtensionCompat = "65ce6f38-6b18-4e1d-a461-8949797d7930" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/ext/SEMNLOptExt/NLopt.jl b/ext/SEMNLOptExt/NLopt.jl index 87601030..3d430201 100644 --- a/ext/SEMNLOptExt/NLopt.jl +++ b/ext/SEMNLOptExt/NLopt.jl @@ -189,7 +189,7 @@ end function Base.show(io::IO, result::NLoptResult) print(io, "Optimizer status: $(result.result[3]) \n") - print(io, "Minimum: $(round(result.result[1]; digits = 2)) \n") + @printf(io, "Minimum: %.4g\n", result.result[1]) print(io, "Algorithm: $(result.problem.algorithm) \n") print(io, "No. evaluations: $(result.problem.numevals) \n") end diff --git a/ext/SEMNLOptExt/SEMNLOptExt.jl b/ext/SEMNLOptExt/SEMNLOptExt.jl index 61c41338..24439459 100644 --- a/ext/SEMNLOptExt/SEMNLOptExt.jl +++ b/ext/SEMNLOptExt/SEMNLOptExt.jl @@ -1,6 +1,6 @@ module SEMNLOptExt -using StructuralEquationModels, NLopt +using StructuralEquationModels, NLopt, Printf SEM = StructuralEquationModels diff --git a/ext/SEMProximalOptExt/ProximalAlgorithms.jl b/ext/SEMProximalOptExt/ProximalAlgorithms.jl index 0937ee04..15c8215f 100644 --- a/ext/SEMProximalOptExt/ProximalAlgorithms.jl +++ b/ext/SEMProximalOptExt/ProximalAlgorithms.jl @@ -99,7 +99,7 @@ function Base.show(io::IO, struct_inst::SemOptimizerProximal) end function Base.show(io::IO, result::ProximalResult) - print(io, "Minimum: $(round(result.minimum; digits = 2)) \n") + @printf(io, "Minimum: %.4g\n", result.minimum) print(io, "No. evaluations: $(result.n_iterations) \n") print(io, "Operator: $(nameof(typeof(result.optimizer.operator_g))) \n") op_h = result.optimizer.operator_h diff --git a/ext/SEMProximalOptExt/SEMProximalOptExt.jl b/ext/SEMProximalOptExt/SEMProximalOptExt.jl index bedf1920..565207f3 100644 --- a/ext/SEMProximalOptExt/SEMProximalOptExt.jl +++ b/ext/SEMProximalOptExt/SEMProximalOptExt.jl @@ -3,6 +3,7 @@ module SEMProximalOptExt using StructuralEquationModels using StructuralEquationModels: print_type_name, print_field_types using ProximalAlgorithms +using Printf export SemOptimizerProximal diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index cb46ce19..2c7feeb2 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -1,6 +1,7 @@ module StructuralEquationModels using LinearAlgebra, + Printf, Optim, NLSolversBase, Statistics, diff --git a/src/frontend/fit/SemFit.jl b/src/frontend/fit/SemFit.jl index 04de8bad..0a4b911e 100644 --- a/src/frontend/fit/SemFit.jl +++ b/src/frontend/fit/SemFit.jl @@ -37,7 +37,7 @@ function Base.show(io::IO, m::MIME"text/plain", semfit::SemFit) print(io, "\n") show(io, m, semfit.model) print(io, "\n") - #print(io, "Objective value: $(round(semfit.minimum, digits = 4)) \n") + #@printf(io, "Objective value: %.4g\n", semfit.minimum) print(io, "------------- Optimization result ------------- \n") print(io, "\n") print(io, "engine: ") diff --git a/src/frontend/fit/summary.jl b/src/frontend/fit/summary.jl index fe7ea930..3364f7f5 100644 --- a/src/frontend/fit/summary.jl +++ b/src/frontend/fit/summary.jl @@ -69,8 +69,7 @@ function details(sem_fit::SemFit; show_fitmeasures = false, color = :light_cyan, key_length = length(string(k)) print(k) print(repeat(" ", goal_length - key_length)) - print(round(a[k]; digits = 2)) - print("\n") + @printf("%.3g\n", a[k]) end end print("\n") From 2c9d455d04209e9b064a059bc8eba9a1d24e6b16 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 20/41] ML/FIML: workaround generic_matmul issue --- src/loss/ML/FIML.jl | 12 +++++++----- src/loss/ML/ML.jl | 14 +++++--------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl index 74d5edfb..0a9f66b6 100644 --- a/src/loss/ML/FIML.jl +++ b/src/loss/ML/FIML.jl @@ -181,16 +181,18 @@ end function ∇F_fiml_outer!(G, JΣ, Jμ, loss::SemFIML) implied = loss.implied + I_A⁻¹ = parent(implied.I_A⁻¹) + F⨉I_A⁻¹ = parent(implied.F⨉I_A⁻¹) + S = parent(implied.S) + Iₙ = sparse(1.0I, size(implied.A)...) - P = kron(implied.F⨉I_A⁻¹, implied.F⨉I_A⁻¹) - Q = kron(implied.S * implied.I_A⁻¹', Iₙ) + P = kron(F⨉I_A⁻¹, F⨉I_A⁻¹) + Q = kron(S * I_A⁻¹', Iₙ) Q .+= loss.commutator * Q ∇Σ = P * (implied.∇S + Q * implied.∇A) - ∇μ = - implied.F⨉I_A⁻¹ * implied.∇M + - kron((implied.I_A⁻¹ * implied.M)', implied.F⨉I_A⁻¹) * implied.∇A + ∇μ = F⨉I_A⁻¹ * implied.∇M + kron((I_A⁻¹ * implied.M)', F⨉I_A⁻¹) * implied.∇A mul!(G, ∇Σ', JΣ) # actually transposed mul!(G, ∇μ', Jμ, -1, 1) diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index 5052aa8d..58ecbe9f 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -209,9 +209,9 @@ function evaluate!(objective, gradient, hessian, loss::SemML, par) end if !isnothing(gradient) - S = implied.S - F⨉I_A⁻¹ = implied.F⨉I_A⁻¹ - I_A⁻¹ = implied.I_A⁻¹ + S = parent(implied.S) + F⨉I_A⁻¹ = parent(implied.F⨉I_A⁻¹) + I_A⁻¹ = parent(implied.I_A⁻¹) ∇A = implied.∇A ∇S = implied.∇S @@ -223,16 +223,12 @@ function evaluate!(objective, gradient, hessian, loss::SemML, par) C = mul!( loss.varXvar_1, F⨉I_A⁻¹', - mul!( - loss.obsXvar_1, - Symmetric(mul!(loss.obsXobs_3, one_Σ⁻¹Σₒ, Σ⁻¹)), - F⨉I_A⁻¹, - ), + mul!(loss.obsXvar_1, mul!(loss.obsXobs_3, one_Σ⁻¹Σₒ, Σ⁻¹), F⨉I_A⁻¹), ) mul!( gradient, ∇A', - vec(mul!(loss.varXvar_3, Symmetric(C), mul!(loss.varXvar_2, S, I_A⁻¹'))), + vec(mul!(loss.varXvar_3, C, mul!(loss.varXvar_2, S, I_A⁻¹'))), 2, 0, ) From c2df5f13048d85f544f5c0fb8f32e5098c358c16 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 21/41] BlackBoxOptim.jl backend support --- Project.toml | 3 + ext/SEMBlackBoxOptimExt/AdamMutation.jl | 49 +++++ ext/SEMBlackBoxOptimExt/BlackBoxOptim.jl | 89 ++++++++ ext/SEMBlackBoxOptimExt/DiffEvoFactory.jl | 196 ++++++++++++++++++ .../SEMBlackBoxOptimExt.jl | 13 ++ .../SemOptimizerBlackBoxOptim.jl | 91 ++++++++ 6 files changed, 441 insertions(+) create mode 100644 ext/SEMBlackBoxOptimExt/AdamMutation.jl create mode 100644 ext/SEMBlackBoxOptimExt/BlackBoxOptim.jl create mode 100644 ext/SEMBlackBoxOptimExt/DiffEvoFactory.jl create mode 100644 ext/SEMBlackBoxOptimExt/SEMBlackBoxOptimExt.jl create mode 100644 ext/SEMBlackBoxOptimExt/SemOptimizerBlackBoxOptim.jl diff --git a/Project.toml b/Project.toml index cdc2f5a7..d72009b8 100644 --- a/Project.toml +++ b/Project.toml @@ -53,9 +53,12 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" test = ["Test"] [weakdeps] +BlackBoxOptim = "a134a8b2-14d6-55f6-9291-3336d3ab0209" NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd" +Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" ProximalAlgorithms = "140ffc9f-1907-541a-a177-7475e0a401e9" [extensions] SEMNLOptExt = "NLopt" SEMProximalOptExt = "ProximalAlgorithms" +SEMBlackBoxOptimExt = ["BlackBoxOptim", "Optimisers"] diff --git a/ext/SEMBlackBoxOptimExt/AdamMutation.jl b/ext/SEMBlackBoxOptimExt/AdamMutation.jl new file mode 100644 index 00000000..4f1a80e3 --- /dev/null +++ b/ext/SEMBlackBoxOptimExt/AdamMutation.jl @@ -0,0 +1,49 @@ +# mutate by moving in the gradient direction +mutable struct AdamMutation{M <: AbstractSem, O, S} <: MutationOperator + model::M + optim::O + opt_state::S + params_fraction::Float64 + + function AdamMutation(model::AbstractSem, params::AbstractDict) + optim = RAdam(params[:AdamMutation_eta], params[:AdamMutation_beta]) + params_fraction = params[:AdamMutation_params_fraction] + opt_state = Optimisers.init(optim, Vector{Float64}(undef, nparams(model))) + + new{typeof(model), typeof(optim), typeof(opt_state)}( + model, + optim, + opt_state, + params_fraction, + ) + end +end + +Base.show(io::IO, op::AdamMutation) = + print(io, "AdamMutation(", op.optim, " state[3]=", op.opt_state[3], ")") + +""" +Default parameters for `AdamMutation`. +""" +const AdamMutation_DefaultOptions = ParamsDict( + :AdamMutation_eta => 1E-1, + :AdamMutation_beta => (0.99, 0.999), + :AdamMutation_params_fraction => 0.25, +) + +function BlackBoxOptim.apply!(m::AdamMutation, v::AbstractVector{<:Real}, target_index::Int) + grad = similar(v) + obj = SEM.evaluate!(0.0, grad, nothing, m.model, v) + @inbounds for i in eachindex(grad) + (rand() > m.params_fraction) && (grad[i] = 0.0) + end + + m.opt_state, dv = Optimisers.apply!(m.optim, m.opt_state, v, grad) + if (m.opt_state[3][1] <= 1E-20) || !isfinite(obj) || any(!isfinite, dv) + m.opt_state = Optimisers.init(m.optim, v) + else + v .-= dv + end + + return v +end diff --git a/ext/SEMBlackBoxOptimExt/BlackBoxOptim.jl b/ext/SEMBlackBoxOptimExt/BlackBoxOptim.jl new file mode 100644 index 00000000..f9d67e06 --- /dev/null +++ b/ext/SEMBlackBoxOptimExt/BlackBoxOptim.jl @@ -0,0 +1,89 @@ +############################################################################################ +### connect to BlackBoxOptim.jl as backend +############################################################################################ + +""" +""" +struct SemOptimizerBlackBoxOptim <: SemOptimizer{:BlackBoxOptim} + lower_bound::Float64 # default lower bound + variance_lower_bound::Float64 # default variance lower bound + lower_bounds::Union{Dict{Symbol, Float64}, Nothing} + + upper_bound::Float64 # default upper bound + upper_bounds::Union{Dict{Symbol, Float64}, Nothing} +end + +function SemOptimizerBlackBoxOptim(; + lower_bound::Float64 = -1000.0, + lower_bounds::Union{AbstractDict{Symbol, Float64}, Nothing} = nothing, + variance_lower_bound::Float64 = 0.001, + upper_bound::Float64 = 1000.0, + upper_bounds::Union{AbstractDict{Symbol, Float64}, Nothing} = nothing, + kwargs..., +) + if variance_lower_bound < 0.0 + throw(ArgumentError("variance_lower_bound must be non-negative")) + end + return SemOptimizerBlackBoxOptim( + lower_bound, + variance_lower_bound, + lower_bounds, + upper_bound, + upper_bounds, + ) +end + +SEM.SemOptimizer{:BlackBoxOptim}(args...; kwargs...) = + SemOptimizerBlackBoxOptim(args...; kwargs...) + +SEM.algorithm(optimizer::SemOptimizerBlackBoxOptim) = optimizer.algorithm +SEM.options(optimizer::SemOptimizerBlackBoxOptim) = optimizer.options + +struct SemModelBlackBoxOptimProblem{M <: AbstractSem} <: + OptimizationProblem{ScalarFitnessScheme{true}} + model::M + fitness_scheme::ScalarFitnessScheme{true} + search_space::ContinuousRectSearchSpace +end + +function BlackBoxOptim.search_space(model::AbstractSem) + optim = model.optimizer::SemOptimizerBlackBoxOptim + varparams = Set(SEM.variance_params(model.implied.ram_matrices)) + return ContinuousRectSearchSpace( + [ + begin + def = in(p, varparams) ? optim.variance_lower_bound : optim.lower_bound + isnothing(optim.lower_bounds) ? def : get(optim.lower_bounds, p, def) + end for p in SEM.params(model) + ], + [ + begin + def = optim.upper_bound + isnothing(optim.upper_bounds) ? def : get(optim.upper_bounds, p, def) + end for p in SEM.params(model) + ], + ) +end + +function SemModelBlackBoxOptimProblem( + model::AbstractSem, + optimizer::SemOptimizerBlackBoxOptim, +) + SemModelBlackBoxOptimProblem(model, ScalarFitnessScheme{true}(), search_space(model)) +end + +BlackBoxOptim.fitness(params::AbstractVector, wrapper::SemModelBlackBoxOptimProblem) = + return SEM.evaluate!(0.0, nothing, nothing, wrapper.model, params) + +# sem_fit method +function SEM.sem_fit( + optimizer::SemOptimizerBlackBoxOptim, + model::AbstractSem, + start_params::AbstractVector; + MaxSteps::Integer = 50000, + kwargs..., +) + problem = SemModelBlackBoxOptimProblem(model, optimizer) + res = bboptimize(problem; MaxSteps, kwargs...) + return SemFit(best_fitness(res), best_candidate(res), nothing, model, res) +end diff --git a/ext/SEMBlackBoxOptimExt/DiffEvoFactory.jl b/ext/SEMBlackBoxOptimExt/DiffEvoFactory.jl new file mode 100644 index 00000000..75080541 --- /dev/null +++ b/ext/SEMBlackBoxOptimExt/DiffEvoFactory.jl @@ -0,0 +1,196 @@ +""" +Base class for factories of optimizers for a specific problem. +""" +abstract type OptimizerFactory{P <: OptimizationProblem} end + +problem(factory::OptimizerFactory) = factory.problem + +const OptController_DefaultParameters = ParamsDict( + :MaxTime => 60.0, + :MaxSteps => 10^8, + :TraceMode => :compact, + :TraceInterval => 5.0, + :RecoverResults => false, + :SaveTrace => false, +) + +function generate_opt_controller(alg::Optimizer, optim_factory::OptimizerFactory, params) + return BlackBoxOptim.OptController( + alg, + problem(optim_factory), + BlackBoxOptim.chain( + BlackBoxOptim.DefaultParameters, + OptController_DefaultParameters, + params, + ), + ) +end + +function check_population( + factory::OptimizerFactory, + popmatrix::BlackBoxOptim.PopulationMatrix, +) + ssp = factory |> problem |> search_space + for i in 1:popsize(popmatrix) + @assert popmatrix[:, i] ∈ ssp "Individual $i is out of space: $(popmatrix[:,i])" # fitness: $(fitness(population, i))" + end +end + +initial_search_space(factory::OptimizerFactory, id::Int) = search_space(factory.problem) + +function initial_population_matrix(factory::OptimizerFactory, id::Int) + #@info "Standard initial_population_matrix()" + ini_ss = initial_search_space(factory, id) + if !isempty(factory.initial_population) + numdims(factory.initial_population) == numdims(factory.problem) || throw( + DimensionMismatch( + "Dimensions of :Population ($(numdims(factory.initial_population))) " * + "are different from the problem dimensions ($(numdims(factory.problem)))", + ), + ) + res = factory.initial_population[ + :, + StatsBase.sample( + 1:popsize(factory.initial_population), + factory.population_size, + ), + ] + else + res = rand_individuals(ini_ss, factory.population_size, method = :latin_hypercube) + end + prj = RandomBound(ini_ss) + if size(res, 2) > 1 + apply!(prj, view(res, :, 1), SEM.start_fabin3(factory.problem.model)) + end + if size(res, 2) > 2 + apply!(prj, view(res, :, 2), SEM.start_simple(factory.problem.model)) + end + return res +end + +# convert individuals in the archive into population matrix +population_matrix(archive::Any) = population_matrix!( + Matrix{Float64}(undef, length(BlackBoxOptim.params(first(archive))), length(archive)), + archive, +) + +function population_matrix!(pop::AbstractMatrix{<:Real}, archive::Any) + npars = length(BlackBoxOptim.params(first(archive))) + size(pop, 1) == npars || throw( + DimensionMismatch( + "Matrix rows count ($(size(pop, 1))) doesn't match the number of problem dimensions ($(npars))", + ), + ) + @inbounds for (i, indi) in enumerate(archive) + (i <= size(pop, 2)) || break + pop[:, i] .= BlackBoxOptim.params(indi) + end + if size(pop, 2) > length(archive) + @warn "Matrix columns count ($(size(pop, 2))) is bigger than population size ($(length(archive))), last columns not set" + end + return pop +end + +generate_embedder(factory::OptimizerFactory, id::Int, problem::OptimizationProblem) = + RandomBound(search_space(problem)) + +abstract type DiffEvoFactory{P <: OptimizationProblem} <: OptimizerFactory{P} end + +generate_selector( + factory::DiffEvoFactory, + id::Int, + problem::OptimizationProblem, + population, +) = RadiusLimitedSelector(get(factory.params, :selector_radius, popsize(population) ÷ 5)) + +function generate_modifier(factory::DiffEvoFactory, id::Int, problem::OptimizationProblem) + ops = GeneticOperator[ + MutationClock(UniformMutation(search_space(problem)), 1 / numdims(problem)), + BlackBoxOptim.AdaptiveDiffEvoRandBin1( + BlackBoxOptim.AdaptiveDiffEvoParameters( + factory.params[:fdistr], + factory.params[:crdistr], + ), + ), + SimplexCrossover{3}(1.05), + SimplexCrossover{2}(1.1), + #SimulatedBinaryCrossover(0.05, 16.0), + #SimulatedBinaryCrossover(0.05, 3.0), + #SimulatedBinaryCrossover(0.1, 5.0), + #SimulatedBinaryCrossover(0.2, 16.0), + UnimodalNormalDistributionCrossover{2}( + chain(BlackBoxOptim.UNDX_DefaultOptions, factory.params), + ), + UnimodalNormalDistributionCrossover{3}( + chain(BlackBoxOptim.UNDX_DefaultOptions, factory.params), + ), + ParentCentricCrossover{2}(chain(BlackBoxOptim.PCX_DefaultOptions, factory.params)), + ParentCentricCrossover{3}(chain(BlackBoxOptim.PCX_DefaultOptions, factory.params)), + ] + if problem isa SemModelBlackBoxOptimProblem + push!( + ops, + AdamMutation(problem.model, chain(AdamMutation_DefaultOptions, factory.params)), + ) + end + FAGeneticOperatorsMixture(ops) +end + +function generate_optimizer( + factory::DiffEvoFactory, + id::Int, + problem::OptimizationProblem, + popmatrix, +) + population = FitPopulation(popmatrix, nafitness(fitness_scheme(problem))) + BlackBoxOptim.DiffEvoOpt( + "AdaptiveDE/rand/1/bin/gradient", + population, + generate_selector(factory, id, problem, population), + generate_modifier(factory, id, problem), + generate_embedder(factory, id, problem), + ) +end + +const Population_DefaultParameters = ParamsDict( + :Population => BlackBoxOptim.PopulationMatrix(undef, 0, 0), + :PopulationSize => 100, +) + +const DE_DefaultParameters = chain( + ParamsDict( + :SelectorRadius => 0, + :fdistr => + BlackBoxOptim.BimodalCauchy(0.65, 0.1, 1.0, 0.1, clampBelow0 = false), + :crdistr => + BlackBoxOptim.BimodalCauchy(0.1, 0.1, 0.95, 0.1, clampBelow0 = false), + ), + Population_DefaultParameters, +) + +struct DefaultDiffEvoFactory{P <: OptimizationProblem} <: DiffEvoFactory{P} + problem::P + initial_population::BlackBoxOptim.PopulationMatrix + population_size::Int + params::ParamsDictChain +end + +DefaultDiffEvoFactory(problem::OptimizationProblem; kwargs...) = + DefaultDiffEvoFactory(problem, BlackBoxOptim.kwargs2dict(kwargs)) + +function DefaultDiffEvoFactory(problem::OptimizationProblem, params::AbstractDict) + params = chain(DE_DefaultParameters, params) + DefaultDiffEvoFactory{typeof(problem)}( + problem, + params[:Population], + params[:PopulationSize], + params, + ) +end + +function BlackBoxOptim.bbsetup(factory::OptimizerFactory; kwargs...) + popmatrix = initial_population_matrix(factory, 1) + check_population(factory, popmatrix) + alg = generate_optimizer(factory, 1, problem(factory), popmatrix) + return generate_opt_controller(alg, factory, BlackBoxOptim.kwargs2dict(kwargs)) +end diff --git a/ext/SEMBlackBoxOptimExt/SEMBlackBoxOptimExt.jl b/ext/SEMBlackBoxOptimExt/SEMBlackBoxOptimExt.jl new file mode 100644 index 00000000..9cbdac4d --- /dev/null +++ b/ext/SEMBlackBoxOptimExt/SEMBlackBoxOptimExt.jl @@ -0,0 +1,13 @@ +module SEMBlackBoxOptimExt + +using StructuralEquationModels, BlackBoxOptim, Optimisers + +SEM = StructuralEquationModels + +export SemOptimizerBlackBoxOptim + +include("AdamMutation.jl") +include("DiffEvoFactory.jl") +include("SemOptimizerBlackBoxOptim.jl") + +end diff --git a/ext/SEMBlackBoxOptimExt/SemOptimizerBlackBoxOptim.jl b/ext/SEMBlackBoxOptimExt/SemOptimizerBlackBoxOptim.jl new file mode 100644 index 00000000..219f409e --- /dev/null +++ b/ext/SEMBlackBoxOptimExt/SemOptimizerBlackBoxOptim.jl @@ -0,0 +1,91 @@ +############################################################################################ +### connect to BlackBoxOptim.jl as backend +############################################################################################ + +""" +""" +struct SemOptimizerBlackBoxOptim <: SemOptimizer{:BlackBoxOptim} + lower_bound::Float64 # default lower bound + variance_lower_bound::Float64 # default variance lower bound + lower_bounds::Union{Dict{Symbol, Float64}, Nothing} + + upper_bound::Float64 # default upper bound + upper_bounds::Union{Dict{Symbol, Float64}, Nothing} +end + +function SemOptimizerBlackBoxOptim(; + lower_bound::Float64 = -1000.0, + lower_bounds::Union{AbstractDict{Symbol, Float64}, Nothing} = nothing, + variance_lower_bound::Float64 = 0.001, + upper_bound::Float64 = 1000.0, + upper_bounds::Union{AbstractDict{Symbol, Float64}, Nothing} = nothing, + kwargs..., +) + if variance_lower_bound < 0.0 + throw(ArgumentError("variance_lower_bound must be non-negative")) + end + return SemOptimizerBlackBoxOptim( + lower_bound, + variance_lower_bound, + lower_bounds, + upper_bound, + upper_bounds, + ) +end + +SEM.SemOptimizer{:BlackBoxOptim}(args...; kwargs...) = + SemOptimizerBlackBoxOptim(args...; kwargs...) + +SEM.algorithm(optimizer::SemOptimizerBlackBoxOptim) = optimizer.algorithm +SEM.options(optimizer::SemOptimizerBlackBoxOptim) = optimizer.options + +struct SemModelBlackBoxOptimProblem{M <: AbstractSem} <: + OptimizationProblem{ScalarFitnessScheme{true}} + model::M + fitness_scheme::ScalarFitnessScheme{true} + search_space::ContinuousRectSearchSpace +end + +function BlackBoxOptim.search_space(model::AbstractSem) + optim = model.optimizer::SemOptimizerBlackBoxOptim + return ContinuousRectSearchSpace( + SEM.lower_bounds( + optim.lower_bounds, + model, + default = optim.lower_bound, + variance_default = optim.variance_lower_bound, + ), + SEM.upper_bounds(optim.upper_bounds, model, default = optim.upper_bound), + ) +end + +function SemModelBlackBoxOptimProblem( + model::AbstractSem, + optimizer::SemOptimizerBlackBoxOptim, +) + SemModelBlackBoxOptimProblem(model, ScalarFitnessScheme{true}(), search_space(model)) +end + +BlackBoxOptim.fitness(params::AbstractVector, wrapper::SemModelBlackBoxOptimProblem) = + return SEM.evaluate!(0.0, nothing, nothing, wrapper.model, params) + +# sem_fit method +function SEM.sem_fit( + optimizer::SemOptimizerBlackBoxOptim, + model::AbstractSem, + start_params::AbstractVector; + Method::Symbol = :adaptive_de_rand_1_bin_with_gradient, + MaxSteps::Integer = 50000, + kwargs..., +) + problem = SemModelBlackBoxOptimProblem(model, optimizer) + if Method == :adaptive_de_rand_1_bin_with_gradient + # custom adaptive differential evolution with mutation that moves along the gradient + bbopt_factory = DefaultDiffEvoFactory(problem; kwargs...) + bbopt = bbsetup(bbopt_factory; MaxSteps, kwargs...) + else + bbopt = bbsetup(problem; Method, MaxSteps, kwargs...) + end + res = bboptimize(bbopt) + return SemFit(best_fitness(res), best_candidate(res), nothing, model, res) +end From 264c38144c9845437ce4e1658e5dc686fc3915b3 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 22/41] non_posdef_return(v) -> non_posdef_objective(v) and move to abstract.jl --- src/loss/ML/FIML.jl | 2 +- src/loss/ML/ML.jl | 16 ++-------------- src/loss/abstract.jl | 9 +++++++++ 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/loss/ML/FIML.jl b/src/loss/ML/FIML.jl index 0a9f66b6..47e3c1ed 100644 --- a/src/loss/ML/FIML.jl +++ b/src/loss/ML/FIML.jl @@ -154,7 +154,7 @@ function evaluate!(objective, gradient, hessian, loss::SemFIML, params) Σ_chol = cholesky!(Symmetric(loss.imp_inv); check = false) if !isposdef(Σ_chol) - isnothing(objective) || (objective = non_posdef_return(params)) + isnothing(objective) || (objective = non_posdef_objective(params)) isnothing(gradient) || fill!(gradient, 1) return objective end diff --git a/src/loss/ML/ML.jl b/src/loss/ML/ML.jl index 58ecbe9f..8295d73f 100644 --- a/src/loss/ML/ML.jl +++ b/src/loss/ML/ML.jl @@ -121,7 +121,7 @@ function evaluate!( Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) if !isposdef(Σ_chol) #@warn "∑⁻¹ is not positive definite" - isnothing(objective) || (objective = non_posdef_return(par)) + isnothing(objective) || (objective = non_posdef_objective(par)) isnothing(gradient) || fill!(gradient, 1) isnothing(hessian) || copyto!(hessian, I) return objective @@ -188,7 +188,7 @@ function evaluate!(objective, gradient, hessian, loss::SemML, par) Σ_chol = cholesky!(Symmetric(Σ⁻¹); check = false) if !isposdef(Σ_chol) #@warn "Σ⁻¹ is not positive definite" - isnothing(objective) || (objective = non_posdef_return(par)) + isnothing(objective) || (objective = non_posdef_objective(par)) isnothing(gradient) || fill!(gradient, 1) isnothing(hessian) || copyto!(hessian, I) return objective @@ -256,15 +256,3 @@ function evaluate!(objective, gradient, hessian, loss::SemML, par) return objective end - -############################################################################################ -### additional functions -############################################################################################ - -function non_posdef_return(par) - if eltype(par) <: AbstractFloat - return floatmax(eltype(par)) - else - return typemax(eltype(par)) - end -end diff --git a/src/loss/abstract.jl b/src/loss/abstract.jl index 56a3af58..8456f0ae 100644 --- a/src/loss/abstract.jl +++ b/src/loss/abstract.jl @@ -76,3 +76,12 @@ replace_observed(loss::AbstractLoss, ::Any; kwargs...) = loss # LossTerm: delegate to inner loss replace_observed(term::LossTerm, data; kwargs...) = LossTerm(replace_observed(loss(term), data; kwargs...), id(term), weight(term)) + +# returned objective if the implied Σ(par) matrix is not positive definite +function non_posdef_objective(par::AbstractVector) + if eltype(par) <: AbstractFloat + return floatmax(eltype(par)) + else + return typemax(eltype(par)) + end +end From 4a134052fbafc0c3b9682b567c59ba3ec18a5288 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 23/41] MeanStruct(ram) --- src/frontend/specification/RAMMatrices.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/frontend/specification/RAMMatrices.jl b/src/frontend/specification/RAMMatrices.jl index 357ab408..35ea1225 100644 --- a/src/frontend/specification/RAMMatrices.jl +++ b/src/frontend/specification/RAMMatrices.jl @@ -12,6 +12,8 @@ struct RAMMatrices <: SemSpecification vars::Union{Vector{Symbol}, Nothing} end +MeanStruct(ram::RAMMatrices) = isnothing(ram.M) ? NoMeanStruct() : HasMeanStruct() + nparams(ram::RAMMatrices) = nparams(ram.A) nvars(ram::RAMMatrices) = size(ram.F, 2) nobserved_vars(ram::RAMMatrices) = size(ram.F, 1) From 0af2f7c59a11a0ed5a744d8e1f15057bd55c3ec5 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 24/41] SemObserved: fix mean_and_cov() call it requires that data is Matrix --- src/observed/data.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/observed/data.jl b/src/observed/data.jl index 39eebe30..7b8a2baa 100644 --- a/src/observed/data.jl +++ b/src/observed/data.jl @@ -40,7 +40,7 @@ function SemObservedData(; ) data, obs_vars, _ = prepare_data(data, observed_vars, specification; observed_var_prefix) - obs_mean, obs_cov = mean_and_cov(data, 1) + obs_mean, obs_cov = mean_and_cov(convert(Matrix, data), 1) if any(ismissing, data) """ From 92fa1cd52eb5f5b8943d7b4f8a80aae3e63e52b6 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 25/41] filter_used_params() not used currently --- src/additional_functions/params_array.jl | 27 ++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/additional_functions/params_array.jl b/src/additional_functions/params_array.jl index 8cd89032..bbbd29d7 100644 --- a/src/additional_functions/params_array.jl +++ b/src/additional_functions/params_array.jl @@ -265,3 +265,30 @@ function params_range(arr::ParamsArray; allow_gaps::Bool = false) return first_i:last_i end + +""" + filter_used_params([linearindex_test], arr::ParamsArray) + +Filter the parameters that are referenced in the `arr`, and +the linear indices of the corresponding parameters pass the +optional `linearindex_test`. + +Returns the indices of the used parameters. +""" +function filter_used_params(linearindex_test, arr::ParamsArray) + inds = Vector{Int}() + for i in 1:nparams(arr) + par_range = SEM.param_occurences_range(arr, i) + isempty(par_range) && continue # not relevant + @inbounds for j in par_range + lin_ind = arr.linear_indices[j] + if isnothing(linearindex_test) || linearindex_test(lin_ind) + push!(inds, i) + break + end + end + end + return inds +end + +filter_used_params(arr::ParamsArray) = filter_used_params(nothing, arr) From c80a0f4b609fd3990590ddfd476031bcce8e8df2 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 26/41] param_indices(spec) method --- src/frontend/specification/documentation.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/frontend/specification/documentation.jl b/src/frontend/specification/documentation.jl index a3a8d265..9aa6d932 100644 --- a/src/frontend/specification/documentation.jl +++ b/src/frontend/specification/documentation.jl @@ -37,6 +37,16 @@ Return the vector of parameter labels (in the same order as [`params`](@ref)). """ param_labels(spec::SemSpecification) = spec.param_labels +""" + param_indices(spec::SemSpecification, params::AbstractVector{Symbol}) -> Vector{Int} + +Convert parameter labels to their indices in the SEM specification. +""" +function param_indices(spec::SemSpecification, params::AbstractVector{Symbol}) + param_map = Dict(param => i for (i, param) in enumerate(SEM.params(spec))) + return [param_map[param] for param in params] +end + """ `ParameterTable`s contain the specification of a structural equation model. From c0313790fcb57074a83e26685154c6fd98249f16 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 27/41] SemNorm: generalize SemRidge - allow l^alpha for any alpha - specific support for alpha=1 (SemLasso) and alpha=2 (SemRidge) - allow affine transform of parameters before regularization - SemSpec rather then SemImplied in ctor - convert calculation to evaluate!() --- src/StructuralEquationModels.jl | 8 +- src/loss/regularization/norm.jl | 190 +++++++++++++++++++++++++++++++ src/loss/regularization/ridge.jl | 87 -------------- 3 files changed, 195 insertions(+), 90 deletions(-) create mode 100644 src/loss/regularization/norm.jl delete mode 100644 src/loss/regularization/ridge.jl diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index 2c7feeb2..0b370f46 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -66,9 +66,9 @@ include("implied/empty.jl") include("loss/abstract.jl") include("loss/ML/ML.jl") include("loss/ML/FIML.jl") -include("loss/regularization/ridge.jl") include("loss/WLS/WLS.jl") include("loss/constant/constant.jl") +include("loss/regularization/norm.jl") # constructor include("frontend/specification/Sem.jl") include("frontend/specification/documentation.jl") @@ -124,9 +124,11 @@ export AbstractSem, SemML, SemFIML, em_mvn, - SemRidge, - SemConstant, SemWLS, + SemConstant, + SemLasso, + SemNorm, + SemRidge, loss, nsem_terms, sem_terms, diff --git a/src/loss/regularization/norm.jl b/src/loss/regularization/norm.jl new file mode 100644 index 00000000..82b05719 --- /dev/null +++ b/src/loss/regularization/norm.jl @@ -0,0 +1,190 @@ +# l^α regularization + +############################################################################################ +### Types +############################################################################################ +""" + struct SemNorm{α, T, TB} <: AbstractLoss{ExactHessian} + +Regularization term that provides *Lᵅ* regularization of SEM parameters. +The term implements the ``\\sum_{i=1\\ldots n} \\left| p_i \right|^{\\alpha}``, +where *p_i*, *i = 1..n* is the vector of selected SEM parameter values. +For `α = 1` it implements the *LASSO* (`SemLasso` alias type), and for +`α = 2` it implements the *Ridge* regularization (`SemRidge` alias type). +The term also allows specifying an optional affine transform (*A × p + b*) +to apply to the parameters before the regularization. + +# Constructors + SemNorm(A::SparseMatrixCSC, b::Union{AbstractVector, Nothing} = nothing) + SemNorm(spec::SemSpecification, params::AbstractVector, + [A::AbstractMatrix = nothing], + [b::AbstractVector = nothing]; + α::Real) + +# Arguments +- `spec`: SEM model specification. +- `params::Vector`: optional IDs (Symbols) or indices of parameters to regularize. +- `A`: optional transformation matrix that defines how to transform the vector of parameter values + before the regularization. If `params` is not specified, the transformation is applied + to the entire parameters vector. +- `b`: optional vector of intercepts to add to the transformed parameters. +- `α`: regularization parameter, any positive real number is supported + +# Examples +```julia +my_lasso = SemLasso(spec, [:λ₁, :λ₂, :ω₂₃]) +my_trans_ridge = SemRidge(spec, [:λ₁, :λ₂, :ω₂₃], [1.0 1.0 0.0; 0.0 0.0 1.0], [-2.0, 0.0]) +``` +""" +struct SemNorm{α, TP, TA, TB, TH} <: AbstractLoss{ExactHessian} + param_inds::TP # indices of parameters to regularize + A::TA # transformation/subsetting of the parameters + At::TA # Aᵀ + b::TB # optional transformed parameter intercepts + H_inds::Vector{Int} # non-zero linear indices of Hessian + H_vals::TH # non-zero values of Hessian +end + +const SemRidge{TP, TA, TB, TH} = SemNorm{2, TP, TA, TB, TH} +const SemLasso{TP, TA, TB, TH} = SemNorm{1, TP, TA, TB, TH} + +############################################################################ +### Constructors +############################################################################ + +function SemNorm{α}( + param_inds::Union{AbstractVector, Nothing} = nothing, + A::Union{AbstractMatrix, Nothing} = nothing, + b::Union{AbstractVector, Nothing} = nothing, +) where {α} + isnothing(A) || + isnothing(param_inds) || + size(A, 2) == length(param_inds) || + throw( + DimensionMismatch( + "The transformation matrix columns ($(size(A, 2))) should match " * + "the number of parameters to regularize ($(length(param_inds)))", + ), + ) + isnothing(b) || + (isnothing(A) && isnothing(param_inds)) || + (length(b) == (isnothing(A) ? length(param_inds) : size(A, 1))) || + throw( + DimensionMismatch( + "The intercept length ($(length(b))) should match the rows of " * + "the transformation matrix ($(isnothing(A) ? "not specified" : size(A, 1)))" * + " or the number of parameters to regularize ($(isnothing(param_inds) ? "not specified" : length(param_inds)))", + ), + ) + + At = !isnothing(A) ? convert(typeof(A), A') : nothing + H = !isnothing(A) ? α * At * A : nothing # FIXME + if isnothing(H) + H_inds = Vector{Int}() + H_v = nothing + else + H_i, H_j, H_v = findnz(H) + H_indmtx = LinearIndices(H) + H_inds = [H_indmtx[i, j] for (i, j) in zip(H_i, H_j)] + H_v = copy(H_v) + end + return SemNorm{α, typeof(param_inds), typeof(A), typeof(b), typeof(H_v)}( + param_inds, + A, + At, + b, + H_inds, + H_v, + ) +end + +function SemNorm{α}( + spec::SemSpecification, + params::AbstractVector, + A::Union{AbstractMatrix, Nothing} = nothing, + b::Union{AbstractVector, Nothing} = nothing, +) where {α} + param_inds = eltype(params) <: Symbol ? param_indices(spec, params) : params + + isnothing(A) || + size(A, 2) == length(param_inds) || + throw( + DimensionMismatch( + "The transformation matrix columns ($(size(A, 2))) should match " * + "the parameters to regularize ($(length(param_inds)))", + ), + ) + + sel_params_mtx = eachrow_to_col(Float64, param_inds, nparams(spec)) + if !isnothing(A) + if A isa SparseMatrixCSC + # for sparse matrices do parameters selection and multiplication in one step + A = convert(typeof(A), A * sel_params_mtx) + param_inds = nothing + end + else # if no matrix, just use selection matrix + A = sel_params_mtx + param_inds = nothing + end + return SemNorm{α}(param_inds, A, b) +end + +SemNorm(args...; α::Real) = SemNorm{α}(args...) + +nparams(f::SemNorm) = size(f.A, 2) + +############################################################################################ +### methods +############################################################################################ + +elnorm(_::Val{α}) where {α} = x -> abs(x)^α +elnorm(_::Val{1}) = abs +elnorm(_::Val{2}) = abs2 + +elnorm(_::SemNorm{α}) where {α} = elnorm(Val(α)) + +# not multiplied by α, handled by mul! +elnormgrad(_::Val{α}) where {α} = x -> abs(x)^(α - 1) * sign(x) +elnormgrad(_::Val{2}) = identity +elnormgrad(_::Val{1}) = sign + +elnormgrad(::SemNorm{α}) where {α} = elnormgrad(Val(α)) + +function evaluate!(objective, gradient, hessian, norm::SemNorm{α}, params) where {α} + if !isnothing(norm.param_inds) + params = params[norm.param_inds] + end + if !isnothing(norm.A) + trf_params = norm.A * params + end + if !isnothing(norm.b) + trf_params .+= norm.b + end + + obj = NaN + isnothing(objective) || (obj = sum(elnorm(norm), trf_params)) + + if !isnothing(gradient) + elgrad_trf_params = elnormgrad(norm).(trf_params) + if !isnothing(norm.param_inds) + mul!(params, norm.At, elgrad_trf_params, α, 0) + fill!(gradient, 0) + @inbounds gradient[norm.param_inds] .= params + else + mul!(gradient, norm.At, elgrad_trf_params, α, 0) + end + end + + if !isnothing(hessian) + fill!(hessian, 0) + if α === 1 + # do nothing, hessian is zero + elseif α === 2 + @inbounds hessian[norm.H_inds] .= norm.H_vals + else + error("Hessian not implemented for α ≠ 1, 2") + # TODO: Implement Hessian for other values of α + end + end + return obj +end diff --git a/src/loss/regularization/ridge.jl b/src/loss/regularization/ridge.jl deleted file mode 100644 index 813aff11..00000000 --- a/src/loss/regularization/ridge.jl +++ /dev/null @@ -1,87 +0,0 @@ -# (Ridge) regularization - -############################################################################################ -### Types -############################################################################################ -""" -Ridge regularization. - -# Constructor - - SemRidge(;α_ridge, which_ridge, nparams, parameter_type = Float64, implied = nothing, kwargs...) - -# Arguments -- `α_ridge`: hyperparameter for penalty term -- `which_ridge::Vector`: Vector of parameter labels (Symbols) or indices that indicate which parameters should be regularized. -- `nparams::Int`: number of parameters of the model -- `implied::SemImplied`: implied part of the model -- `parameter_type`: type of the parameters - -# Examples -```julia -my_ridge = SemRidge(;α_ridge = 0.02, which_ridge = [:λ₁, :λ₂, :ω₂₃], nparams = 30, implied = my_implied) -``` - -# Interfaces -Analytic gradients and hessians are available. -""" -struct SemRidge{P, W1, W2, GT, HT} <: AbstractLoss - hessianeval::ExactHessian - α::P - which::W1 - which_H::W2 - - gradient::GT - hessian::HT -end - -############################################################################ -### Constructors -############################################################################ - -function SemRidge(; - α_ridge, - which_ridge, - nparams, - parameter_type = Float64, - implied = nothing, - kwargs..., -) - if eltype(which_ridge) <: Symbol - if isnothing(implied) - throw( - ArgumentError( - "When referring to parameters by label, `implied = ...` has to be specified", - ), - ) - else - par2ind = param_indices(implied) - which_ridge = getindex.(Ref(par2ind), which_ridge) - end - end - which_H = [CartesianIndex(x, x) for x in which_ridge] - return SemRidge( - ExactHessian(), - α_ridge, - which_ridge, - which_H, - zeros(parameter_type, nparams), - zeros(parameter_type, nparams, nparams), - ) -end - -############################################################################################ -### methods -############################################################################################ - -objective(ridge::SemRidge, par) = @views ridge.α * sum(abs2, par[ridge.which]) - -function gradient(ridge::SemRidge, par) - @views ridge.gradient[ridge.which] .= (2 * ridge.α) * par[ridge.which] - return ridge.gradient -end - -function hessian(ridge::SemRidge, par) - @views @. ridge.hessian[ridge.which_H] .= 2 * ridge.α - return ridge.hessian -end From bce471ce6835bc57739ddf98c2d428ddf93720d0 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 28/41] add SemHinge --- src/StructuralEquationModels.jl | 3 ++ src/loss/regularization/hinge.jl | 80 ++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 src/loss/regularization/hinge.jl diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index 0b370f46..c63c7f16 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -68,6 +68,8 @@ include("loss/ML/ML.jl") include("loss/ML/FIML.jl") include("loss/WLS/WLS.jl") include("loss/constant/constant.jl") +# regularization +include("loss/regularization/hinge.jl") include("loss/regularization/norm.jl") # constructor include("frontend/specification/Sem.jl") @@ -126,6 +128,7 @@ export AbstractSem, em_mvn, SemWLS, SemConstant, + SemHinge, SemLasso, SemNorm, SemRidge, diff --git a/src/loss/regularization/hinge.jl b/src/loss/regularization/hinge.jl new file mode 100644 index 00000000..b968068e --- /dev/null +++ b/src/loss/regularization/hinge.jl @@ -0,0 +1,80 @@ +""" + SemHinge{B} <: SemLossFunction{ExactHessian} + +Hinge regularization. + +Implements *hinge* a.k.a *rectified linear unit* (*ReLU*) loss function: +```math +f_{\\alpha, t}(x) = \\begin{cases} 0 & \\text{if}\\ x \\leq t \\\\ + \\alpha (x - t) & \\text{if } x > t. + \\end{cases} +``` +""" +struct SemHinge{B} <: SemLossFunction{ExactHessian} + threshold::Float64 + α::Float64 + param_inds::Vector{Int} # indices of parameters to regularize +end + +""" + SemHinge(spec::SemSpecification; + bound = 'l', threshold = 0.0, α, params) + +# Arguments +- `spec`: SEM model specification +- `threshold`: hyperparameter for penalty term +- `α_hinge`: hyperparameter for penalty term +- `which_hinge::AbstractVector`: Vector of parameter labels (Symbols) + or indices that indicate which parameters should be regularized. + +# Examples +```julia +my_hinge = SemHinge(spec; bound = 'u', α = 0.02, params = [:λ₁, :λ₂, :ω₂₃]) +``` +""" +function SemHinge( + spec::SemSpecification; + bound::Char = 'l', + threshold::Number = 0.0, + α::Number, + params::AbstractVector, +) + bound ∈ ('l', 'u') || + throw(ArgumentError("bound must be either 'l' or 'u', $bound given")) + + param_inds = eltype(params) <: Symbol ? param_indices(spec, params) : params + return SemHinge{bound}(threshold, α, param_inds) +end + +(hinge::SemHinge{'l'})(val::Number) = max(val - hinge.threshold, 0.0) +(hinge::SemHinge{'u'})(val::Number) = max(hinge.threshold - val, 0.0) + +function evaluate!( + objective, + gradient, + hessian, + hinge::SemHinge{B}, + imply::SemImply, + model, + params, +) where {B} + obj = NaN + if !isnothing(objective) + @inbounds obj = hinge.α * sum(i -> hinge(params[i]), hinge.param_inds) + end + if !isnothing(gradient) + fill!(gradient, 0) + @inbounds for i in hinge.param_inds + par = params[i] + if B == 'l' && par > hinge.threshold + gradient[i] = hinge.α + elseif B == 'u' && par < hinge.threshold + gradient[i] = -hinge.α + end + end + end + if !isnothing(hessian) + fill!(hessian, 0) + end + return obj +end From 7e37510186515327ae57359c157a36e714dcf11d Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 29/41] add SemSquaredHinge --- src/StructuralEquationModels.jl | 2 + src/loss/regularization/squared_hinge.jl | 93 ++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 src/loss/regularization/squared_hinge.jl diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index c63c7f16..15437b49 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -71,6 +71,7 @@ include("loss/constant/constant.jl") # regularization include("loss/regularization/hinge.jl") include("loss/regularization/norm.jl") +include("loss/regularization/squared_hinge.jl") # constructor include("frontend/specification/Sem.jl") include("frontend/specification/documentation.jl") @@ -132,6 +133,7 @@ export AbstractSem, SemLasso, SemNorm, SemRidge, + SemSquaredHinge, loss, nsem_terms, sem_terms, diff --git a/src/loss/regularization/squared_hinge.jl b/src/loss/regularization/squared_hinge.jl new file mode 100644 index 00000000..c49b897b --- /dev/null +++ b/src/loss/regularization/squared_hinge.jl @@ -0,0 +1,93 @@ +""" + SemHinge{B} <: SemLossFunction{ExactHessian} + +Hinge regularization. + +Implements *hinge* a.k.a *rectified linear unit* (*ReLU*) loss function: +```math +f_{\\alpha, t}(x) = \\begin{cases} 0 & \\text{if}\\ x \\leq t \\\\ + \\alpha (x - t)^2 & \\text{if } x > t. + \\end{cases} +``` +""" +struct SemSquaredHinge{B} <: SemLossFunction{ExactHessian} + threshold::Float64 + α::Float64 + param_inds::Vector{Int} # indices of parameters to regularize + H_diag_inds::Vector{Int} # indices of Hessian diagonal elements to regularize +end + +""" + SemSquaredHinge(spec::SemSpecification; + bound = 'l', threshold = 0.0, α, params) + +# Arguments +- `spec`: SEM model specification +- `threshold`: hyperparameter for penalty term +- `α_hinge`: hyperparameter for penalty term +- `which_hinge::AbstractVector`: Vector of parameter labels (Symbols) + or indices that indicate which parameters should be regularized. + +# Examples +```julia +my_hinge = SemHinge(spec; bound = 'u', α = 0.02, params = [:λ₁, :λ₂, :ω₂₃]) +``` +""" +function SemSquaredHinge( + spec::SemSpecification; + bound::Char = 'l', + threshold::Number = 0.0, + α::Number, + params::AbstractVector, +) + bound ∈ ('l', 'u') || + throw(ArgumentError("bound must be either 'l' or 'u', $bound given")) + + param_inds = eltype(params) <: Symbol ? param_indices(spec, params) : params + H_linind = LinearIndices((nparams(spec), nparams(spec))) + return SemSquaredHinge{bound}( + threshold, + α, + param_inds, + [H_linind[i, i] for i in param_inds], + ) +end + +(sqhinge::SemSquaredHinge{'l'})(val::Number) = abs2(max(val - sqhinge.threshold, 0.0)) +(sqhinge::SemSquaredHinge{'u'})(val::Number) = abs2(max(sqhinge.threshold - val, 0.0)) + +function evaluate!( + objective, + gradient, + hessian, + sqhinge::SemSquaredHinge{B}, + imply::SemImply, + model, + params, +) where {B} + obj = NaN + if !isnothing(objective) + @inbounds obj = sqhinge.α * sum(i -> sqhinge(params[i]), sqhinge.param_inds) + end + if !isnothing(gradient) + fill!(gradient, 0) + @inbounds for i in sqhinge.param_inds + par = params[i] + if (B == 'l' && par > sqhinge.threshold) || + (B == 'u' && par < sqhinge.threshold) + gradient[i] = 2 * sqhinge.α * (par - sqhinge.threshold) + end + end + end + if !isnothing(hessian) + fill!(hessian, 0) + @inbounds for (par_i, H_i) in zip(sqhinge.param_inds, sqhinge.H_diag_inds) + par = params[par_i] + if (B == 'l' && par > sqhinge.threshold) || + (B == 'u' && par < sqhinge.threshold) + hessian[H_i] = 2 * sqhinge.α + end + end + end + return obj +end From 1d15191b46cf7edfcda9b1b8307a0b3a051c1791 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 30/41] quad.jl: optimized methods for X*A*X', X*X' etc --- src/StructuralEquationModels.jl | 1 + src/additional_functions/quad.jl | 192 +++++++++++++++++++++++++++++++ 2 files changed, 193 insertions(+) create mode 100644 src/additional_functions/quad.jl diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index 15437b49..b8040a86 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -35,6 +35,7 @@ include("objective_gradient_hessian.jl") include("additional_functions/commutation_matrix.jl") include("additional_functions/sparse_utils.jl") include("additional_functions/params_array.jl") +include("additional_functions/quad.jl") # fitted objects include("frontend/fit/SemFit.jl") diff --git a/src/additional_functions/quad.jl b/src/additional_functions/quad.jl new file mode 100644 index 00000000..11287607 --- /dev/null +++ b/src/additional_functions/quad.jl @@ -0,0 +1,192 @@ +_unwrap_symmetric(res::AbstractMatrix) = res +_unwrap_symmetric(res::Symmetric) = parent(res) + +# faster version of copytri!() that uses blascopy!() +function blascopytri!(A::StridedMatrix, uplo::AbstractChar) + n = LinearAlgebra.checksquare(A) + if uplo == 'L' + for (i, di) in enumerate(diagind(A)) + (i < n) || continue + BLAS.blascopy!( + n - i, + pointer(A, di + 1), + stride(A, 1), + pointer(A, di + size(A, 2)), + stride(A, 2), + ) + end + elseif uplo == 'U' + for (i, di) in enumerate(diagind(A)) + (i < n) || continue + BLAS.blascopy!( + n - i, + pointer(A, di + size(A, 2)), + stride(A, 2), + pointer(A, di + 1), + stride(A, 1), + ) + end + else + lazy"uplo argument must be 'U' (upper) or 'L' (lower), got $uplo" |> + ArgumentError |> + throw + end + return A +end + +# faster copytri!() that uses @simd, @inbounds and drops elementwise conjugation +@inline function fastcopytri!(A::AbstractMatrix, uplo::AbstractChar) + n = LinearAlgebra.checksquare(A) + if uplo == 'U' + @inbounds for i in axes(A, 1) + @simd for j in (i+1):n + A[j, i] = A[i, j] + end + end + elseif uplo == 'L' + @inbounds for i in axes(A, 1) + @simd for j in (i+1):n + A[i, j] = A[j, i] + end + end + else + lazy"uplo argument must be 'U' (upper) or 'L' (lower), got $uplo" |> + ArgumentError |> + throw + end + A +end + +# faster version that drops issymmetric checks +# and switches to gemm mode for large matrices +@inline function syrk_wrapper!( + res::AbstractMatrix, + trans::Char, + X::Union{StridedMatrix, StridedVector}, + alpha::Real = 1, + beta::Real = 0; + check::Bool = true, + # big matrices are multiplied in gemm mode to avoid long copytri!() + mode::Symbol = size(res, 1) >= 1000 ? :gemm : :syrk, +) + T = eltype(X) + if mode == :syrk && (iszero(beta) || (!check || issymmetric(res))) + BLAS.syrk!('U', trans, T(alpha), X, T(beta), _unwrap_symmetric(res)) + fastcopytri!(_unwrap_symmetric(res), 'U') + elseif mode == :gemm # generic + LinearAlgebra.gemm_wrapper!( + _unwrap_symmetric(res), + trans, + trans == 'N' ? 'T' : 'N', + X, + X, + LinearAlgebra.MulAddMul(alpha, beta), + ) + else + throw(ArgumentError(lazy"mode must be :syrk or :gemm, $mode given")) + end + return res +end + +# calculate Xᵀ⋅X +Xt_X!(res::AbstractMatrix, X::AbstractMatrix, alpha::Real = 1, beta::Real = 0) = + mul!(_unwrap_symmetric(res), X', X, alpha, beta) + +Xt_X!(res::AbstractMatrix, X::StridedMatrix, alpha::Real = 1, beta::Real = 0; kwargs...) = + syrk_wrapper!(res, 'T', X, alpha, beta; kwargs...) + +X_Xt!( + res::AbstractMatrix, + X::Union{AbstractMatrix, AbstractVector}, + alpha::Real = 1, + beta::Real = 0, +) = mul!(_unwrap_symmetric(res), X, X', alpha, beta) + +X_Xt!( + res::AbstractMatrix, + X::Union{StridedMatrix, StridedVector}, + alpha::Real = 1, + beta::Real = 0; + kwargs..., +) = syrk_wrapper!(res, 'N', X, alpha, beta; kwargs...) + +Xt_X(X::AbstractMatrix) = Xt_X!(Matrix{eltype(X)}(undef, size(X, 2), size(X, 2)), X) + +X_Xt(X::Union{AbstractMatrix, AbstractVector}) = + X_Xt!(Matrix{eltype(X)}(undef, size(X, 1), size(X, 1)), X) + +# calculate Xᵀ⋅A⋅X +# FIXME: use PDMats.jl when its sparse matrix support is refactored +# see https://github.com/JuliaStats/PDMats.jl/pull/188 +function Xt_A_X!( + res::AbstractMatrix, + A::AbstractMatrix, + X::AbstractMatrix, + alpha::Real = 1, + beta::Real = 0; + Xt_A_buf::Union{AbstractMatrix, Nothing} = nothing, +) + Xt_A = !isnothing(Xt_A_buf) ? mul!(Xt_A_buf, X', A) : X'A + return mul!(_unwrap_symmetric(res), Xt_A, X, alpha, beta) +end + +# special handling of symmetric to make sure it is the first argument in * +function Xt_A_X!( + res::AbstractMatrix, + A::Symmetric{<:Any, M}, + X::AbstractMatrix, + alpha::Real = 1, + beta::Real = 0; + Xt_A_buf::Union{AbstractMatrix, Nothing} = nothing, +) where {M <: StridedMatrix} + A_X = !isnothing(Xt_A_buf) ? mul!(reshape(Xt_A_buf, size(X)), A, X) : A*X + return mul!(_unwrap_symmetric(res), X', A_X, alpha, beta) +end + +Xt_A_X(A::AbstractMatrix, X::AbstractMatrix, alpha::Real = 1; kwargs...) = Xt_A_X!( + Matrix{promote_type(eltype(A), eltype(X))}(undef, size(X, 2), size(X, 2)), + A, + X, + alpha, + 0; + kwargs..., +) + +function X_A_Xt!( + res::AbstractMatrix, + A::AbstractMatrix, + X::AbstractMatrix, + alpha::Real = 1, + beta::Real = 0; + X_A_buf::Union{AbstractMatrix, Nothing} = nothing, +) + X_A = !isnothing(X_A_buf) ? mul!(X_A_buf, X, A) : X * A + return mul!(_unwrap_symmetric(res), X_A, X', alpha, beta) +end + +# special handling of symmetric to make sure it is the first argument in * +function X_A_Xt!( + res::AbstractMatrix, + A::Symmetric{<:Any, M}, + X::AbstractMatrix, + alpha::Real = 1, + beta::Real = 0; + X_A_buf::Union{AbstractMatrix, Nothing} = nothing, +) where {M <: StridedMatrix} + # FIXME in principle no need to unwrap A, but with symmetric A and transposed X + # julia's generic_matmatmul() falls back into non-BLAS implementation (looks like Julia's bug) + A_Xt = + !isnothing(X_A_buf) ? + mul!(reshape(X_A_buf, size(X, 2), size(X, 1)), _unwrap_symmetric(A), X') : + _unwrap_symmetric(A) * X' + return mul!(_unwrap_symmetric(res), X, A_Xt, alpha, beta) +end + +X_A_Xt(A::AbstractMatrix, X::AbstractMatrix, alpha::Real = 1; kwargs...) = X_A_Xt!( + Matrix{promote_type(eltype(A), eltype(X))}(undef, size(X, 1), size(X, 1)), + A, + X, + alpha, + 0; + kwargs..., +) From e3cd308acebd4bb949ef460a34bb959de336cb06 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 31/41] trunc_eigvals(): use X_A_Xt() --- src/additional_functions/helper.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/additional_functions/helper.jl b/src/additional_functions/helper.jl index d7c7d810..696577f2 100644 --- a/src/additional_functions/helper.jl +++ b/src/additional_functions/helper.jl @@ -122,7 +122,7 @@ function trunc_eigvals( if eigmin < min_eigval # substitute small eigvals with min_eigval eigvals_mtx = Diagonal(max.(mtx_eig.values, min_eigval)) - newmtx = mtx_eig.vectors * eigvals_mtx * mtx_eig.vectors' + newmtx = X_A_Xt(eigvals_mtx, mtx_eig.vectors) StatsBase._symmetrize!(newmtx) if verbose Δmtx = newmtx .- mtx From 10a41abe62eb820c6d00f504601be187284131da Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 32/41] predict_latent_scores() --- src/StructuralEquationModels.jl | 1 + src/frontend/predict.jl | 142 ++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 src/frontend/predict.jl diff --git a/src/StructuralEquationModels.jl b/src/StructuralEquationModels.jl index b8040a86..6f27f757 100644 --- a/src/StructuralEquationModels.jl +++ b/src/StructuralEquationModels.jl @@ -49,6 +49,7 @@ include("frontend/specification/StenoGraphs.jl") include("frontend/fit/summary.jl") include("frontend/StatsAPI.jl") include("frontend/finite_diff.jl") +include("frontend/predict.jl") # pretty printing include("frontend/pretty_printing.jl") # observed diff --git a/src/frontend/predict.jl b/src/frontend/predict.jl new file mode 100644 index 00000000..f7aa66f6 --- /dev/null +++ b/src/frontend/predict.jl @@ -0,0 +1,142 @@ +abstract type SemScoresPredictMethod end + +struct SemRegressionScores <: SemScoresPredictMethod end +struct SemBartlettScores <: SemScoresPredictMethod end +struct SemAndersonRubinScores <: SemScoresPredictMethod end + +function SemScoresPredictMethod(method::Symbol) + if method == :regression + return SemRegressionScores() + elseif method == :Bartlett + return SemBartlettScores() + elseif method == :AndersonRubin + return SemAndersonRubinScores() + else + throw(ArgumentError("Unsupported prediction method: $method")) + end +end + +predict_latent_scores( + fit::SemFit, + data::SemObserved = observed(sem_term(fit.model)); + method::Symbol = :regression, +) = predict_latent_scores(SemScoresPredictMethod(method), fit, data) + +predict_latent_scores( + method::SemScoresPredictMethod, + fit::SemFit, + data::SemObserved = observed(sem_term(fit.model)), +) = predict_latent_scores(method, loss(sem_term(fit.model)), fit.solution, data) + +function inv_cov!(A::AbstractMatrix) + if istril(A) + A = LowerTriangular(A) + elseif istriu(A) + A = UpperTriangular(A) + else + end + A_chol = Cholesky(A) + return inv!(A_chol) +end + +# wrapper that materializes A and S matrices from the model params +function latent_scores_operator( + ::Type{T}, + model::SemLoss, + params::AbstractVector; + kwargs..., +) where {T <: SemScoresPredictMethod} + ram = SEM.implied(model).ram_matrices + latent_scores_operator( + T, + SEM.implied(model), + materialize(ram.A, params), + materialize(ram.S, params); + kwargs..., + ) +end + +function latent_scores_operator( + ::SemRegressionScores, + implied::SemImplied, + A::AbstractMatrix, + S::AbstractMatrix, +) + implied = SEM.implied(model) + ram = implied.ram_matrices + lv_inds = latent_var_indices(ram) + + lv_FA = ram.F * A[:, lv_inds] + lv_I_A⁻¹ = inv(I - A)[lv_inds, :] + + cov_lv = X_A_Xt(S, lv_I_A⁻¹) + Σ = implied.Σ + Σ⁻¹ = inv(Σ) + return cov_lv * lv_FA' * Σ⁻¹ +end + +function latent_scores_operator( + ::SemBartlettScores, + implied::SemImplied, + A::AbstractMatrix, + S::AbstractMatrix, +) + ram = implied.ram_matrices + lv_inds = latent_var_indices(ram) + lv_FA = ram.F * A[:, lv_inds] + + obs_inds = observed_var_indices(ram) + ov_S⁻¹ = inv(S[obs_inds, obs_inds]) + cov_lv⁻¹ = Xt_A_X(ov_S⁻¹, lv_FA) + cov_lv = inv(cov_lv⁻¹) + + return cov_lv * lv_FA' * ov_S⁻¹ +end + +function predict_latent_scores( + method::SemScoresPredictMethod, + model::SemLoss, + params::AbstractVector, + data::SemObserved = observed(model), +) + nobserved_vars(data) == nobserved_vars(model) || throw( + DimensionMismatch( + "Number of variables in data ($(nsamples(data))) does not match the number of observed variables in the model ($(nobserved_vars(model)))", + ), + ) + length(params) == nparams(model) || throw( + DimensionMismatch( + "The length of parameters vector ($(length(params))) does not match the number of parameters in the model ($(nparams(model)))", + ), + ) + + implied = SEM.implied(model) + ram = implied.ram_matrices + update!(EvaluationTargets(0.0, nothing, nothing), implied, params) + ram = implied.ram_matrices + lv_inds = latent_var_indices(ram) + + A = materialize(ram.A, params) + S = materialize(ram.S, params) + lv_inds = latent_var_indices(ram) + lv_scores_op = latent_scores_operator(method, implied, A, S) + + data = + data.data .- (isnothing(data.obs_mean) ? mean(data.data, dims = 1) : data.obs_mean') + lv_scores = data * lv_scores_op' + # adjust the scores w.r.t the variable means + if MeanStruct(implied) === HasMeanStruct + M = materialize(ram.M, params) + lv_I_A⁻¹ = inv(I - A)[lv_inds, :] + lv_scores .+= (lv_I_A⁻¹ * M)' + end + + return lv_scores +end + +predict_latent_scores( + model::SemLoss, + params::AbstractVector, + data::SemObserved = observed(model); + method::Symbol = :regression, +) = predict_latent_scores(SemScoresPredictMethod(method), model, params, data) From fad3a1b23051bf06d86b4467d60402196d017f01 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 33/41] predict: add alpha regularization --- src/frontend/predict.jl | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/frontend/predict.jl b/src/frontend/predict.jl index f7aa66f6..ee55b953 100644 --- a/src/frontend/predict.jl +++ b/src/frontend/predict.jl @@ -60,16 +60,21 @@ function latent_scores_operator( ::SemRegressionScores, implied::SemImplied, A::AbstractMatrix, - S::AbstractMatrix, + S::AbstractMatrix; + alpha::Number = 0, ) implied = SEM.implied(model) ram = implied.ram_matrices lv_inds = latent_var_indices(ram) lv_FA = ram.F * A[:, lv_inds] - lv_I_A⁻¹ = inv(I - A)[lv_inds, :] - cov_lv = X_A_Xt(S, lv_I_A⁻¹) + cov_lv = if alpha == 0 + lv_I_A⁻¹ = inv(I - A)[lv_inds, :] + X_A_Xt(S, lv_I_A⁻¹) + else + inv(Xt_A_X(inv(S), I - A) + alpha * I)[lv_inds, lv_inds] + end Σ = implied.Σ Σ⁻¹ = inv(Σ) return cov_lv * lv_FA' * Σ⁻¹ @@ -79,7 +84,8 @@ function latent_scores_operator( ::SemBartlettScores, implied::SemImplied, A::AbstractMatrix, - S::AbstractMatrix, + S::AbstractMatrix; + alpha::Number = 0, ) ram = implied.ram_matrices lv_inds = latent_var_indices(ram) @@ -88,6 +94,7 @@ function latent_scores_operator( obs_inds = observed_var_indices(ram) ov_S⁻¹ = inv(S[obs_inds, obs_inds]) cov_lv⁻¹ = Xt_A_X(ov_S⁻¹, lv_FA) + (alpha != 0) && (cov_lv⁻¹ += alpha * I) cov_lv = inv(cov_lv⁻¹) return cov_lv * lv_FA' * ov_S⁻¹ @@ -97,7 +104,8 @@ function predict_latent_scores( method::SemScoresPredictMethod, model::SemLoss, params::AbstractVector, - data::SemObserved = observed(model), + data::SemObserved = observed(model); + alpha::Number = 0, ) nobserved_vars(data) == nobserved_vars(model) || throw( DimensionMismatch( @@ -109,17 +117,17 @@ function predict_latent_scores( "The length of parameters vector ($(length(params))) does not match the number of parameters in the model ($(nparams(model)))", ), ) + (alpha < 0) && + throw(ArgumentError("The regularization parameter alpha must be non-negative")) implied = SEM.implied(model) ram = implied.ram_matrices update!(EvaluationTargets(0.0, nothing, nothing), implied, params) - ram = implied.ram_matrices - lv_inds = latent_var_indices(ram) A = materialize(ram.A, params) S = materialize(ram.S, params) lv_inds = latent_var_indices(ram) - lv_scores_op = latent_scores_operator(method, implied, A, S) + lv_scores_op = latent_scores_operator(method, implied, A, S; alpha) data = data.data .- (isnothing(data.obs_mean) ? mean(data.data, dims = 1) : data.obs_mean') @@ -139,4 +147,5 @@ predict_latent_scores( params::AbstractVector, data::SemObserved = observed(model); method::Symbol = :regression, -) = predict_latent_scores(SemScoresPredictMethod(method), model, params, data) + alpha::Number = 0, +) = predict_latent_scores(SemScoresPredictMethod(method), model, params, data; alpha) From 7c75f8446ae7796a699208da24b10735bac4f4c5 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 34/41] predict(model, scores) by default predicts observed vars based on the latent var scores --- src/frontend/predict.jl | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/frontend/predict.jl b/src/frontend/predict.jl index ee55b953..d736968d 100644 --- a/src/frontend/predict.jl +++ b/src/frontend/predict.jl @@ -1,3 +1,35 @@ +function predict( + model::SemLoss, + params::AbstractVector, + scores::AbstractMatrix; + score_vars::Union{AbstractVector, Nothing} = latent_var_indices(model), + predict_vars::Union{AbstractVector, Nothing} = observed_var_indices(model), +) + ram = imply(model).ram_matrices + score_var_inds = + !isnothing(score_vars) ? check_var_indices(ram, score_vars, normalize = false) : + nothing + A = materialize(ram.A, params) + I_A⁻¹ = inv(I - A) + sv_I_A⁻¹ = !isnothing(score_var_inds) ? I_A⁻¹[:, score_var_inds] : I_A⁻¹ + res = scores * sv_I_A⁻¹' + if MeanStructure(imply(model)) === HasMeanStructure + # score_vars intercepts already included in the scores + M = materialize(ram.M, params) + if isnothing(score_var_inds) + fill!(M, 0) + else + M[score_var_inds, :] .= 0 + end + res .+= (I_A⁻¹ * M)' + end + if !isnothing(predict_vars) + predict_var_inds = check_var_indices(ram, predict_vars, normalize = false) + res = res[:, predict_var_inds] + end + return res +end + abstract type SemScoresPredictMethod end struct SemRegressionScores <: SemScoresPredictMethod end From d766f58942019158c64fbc11c120a268ac53191d Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 35/41] latent_scores(): allow specifying latent vars subset --- src/frontend/predict.jl | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/frontend/predict.jl b/src/frontend/predict.jl index d736968d..d444a3c2 100644 --- a/src/frontend/predict.jl +++ b/src/frontend/predict.jl @@ -75,6 +75,7 @@ end function latent_scores_operator( ::Type{T}, model::SemLoss, + latent_vars::AbstractVector, params::AbstractVector; kwargs..., ) where {T <: SemScoresPredictMethod} @@ -82,6 +83,7 @@ function latent_scores_operator( latent_scores_operator( T, SEM.implied(model), + latent_vars, materialize(ram.A, params), materialize(ram.S, params); kwargs..., @@ -91,21 +93,21 @@ end function latent_scores_operator( ::SemRegressionScores, implied::SemImplied, + latent_vars::AbstractVector, A::AbstractMatrix, S::AbstractMatrix; alpha::Number = 0, ) implied = SEM.implied(model) ram = implied.ram_matrices - lv_inds = latent_var_indices(ram) - lv_FA = ram.F * A[:, lv_inds] + lv_FA = ram.F * A[:, latent_vars] cov_lv = if alpha == 0 - lv_I_A⁻¹ = inv(I - A)[lv_inds, :] + lv_I_A⁻¹ = inv(I - A)[latent_vars, :] X_A_Xt(S, lv_I_A⁻¹) else - inv(Xt_A_X(inv(S), I - A) + alpha * I)[lv_inds, lv_inds] + inv(Xt_A_X(inv(S), I - A) + alpha * I)[latent_vars, latent_vars] end Σ = implied.Σ Σ⁻¹ = inv(Σ) @@ -115,13 +117,13 @@ end function latent_scores_operator( ::SemBartlettScores, implied::SemImplied, + latent_vars::AbstractVector, A::AbstractMatrix, S::AbstractMatrix; alpha::Number = 0, ) ram = implied.ram_matrices - lv_inds = latent_var_indices(ram) - lv_FA = ram.F * A[:, lv_inds] + lv_FA = ram.F * A[:, latent_vars] obs_inds = observed_var_indices(ram) ov_S⁻¹ = inv(S[obs_inds, obs_inds]) @@ -137,6 +139,7 @@ function predict_latent_scores( model::SemLoss, params::AbstractVector, data::SemObserved = observed(model); + latent_vars::Union{AbstractVector, Nothing} = nothing, alpha::Number = 0, ) nobserved_vars(data) == nobserved_vars(model) || throw( @@ -154,12 +157,13 @@ function predict_latent_scores( implied = SEM.implied(model) ram = implied.ram_matrices + lv_inds = check_var_indices(ram, latent_vars, allow_observed = false, normalize = true) + update!(EvaluationTargets(0.0, nothing, nothing), implied, params) A = materialize(ram.A, params) S = materialize(ram.S, params) - lv_inds = latent_var_indices(ram) - lv_scores_op = latent_scores_operator(method, implied, A, S; alpha) + lv_scores_op = latent_scores_operator(method, implied, lv_inds, A, S; alpha) data = data.data .- (isnothing(data.obs_mean) ? mean(data.data, dims = 1) : data.obs_mean') @@ -179,5 +183,13 @@ predict_latent_scores( params::AbstractVector, data::SemObserved = observed(model); method::Symbol = :regression, + latent_vars::Union{AbstractVector, Nothing} = nothing, alpha::Number = 0, -) = predict_latent_scores(SemScoresPredictMethod(method), model, params, data; alpha) +) = predict_latent_scores( + SemScoresPredictMethod(method), + model, + params, + data; + latent_vars, + alpha, +) From 34ee98d8a7aefb9af1f610e617e3b9852753253d Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 36/41] predict: add scoring methods docstrings --- src/frontend/predict.jl | 180 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) diff --git a/src/frontend/predict.jl b/src/frontend/predict.jl index d444a3c2..2afaa52f 100644 --- a/src/frontend/predict.jl +++ b/src/frontend/predict.jl @@ -30,10 +30,172 @@ function predict( return res end +""" + SemScoresPredictMethod + +Abstract supertype for latent-score prediction methods used by [`predict_latent_scores`](@ref). + +The subtypes should implement the [`latent_scores_solver()`](@ref) method that creates +an instance of [`ScoresSolver`](@ref). + +*SEM.jl* implements the following methods: + +- [`SemRegressionScores`](@ref) for *Regression/Thomson* scores. +- [`SemBartlettScores`](@ref) for *Bartlett* scores. +- [`SemAndersonRubinScores`](@ref) for *Anderson-Rubin* scores. + +See the concrete type docstrings for the mathematical definitions and interpretation of +each method. +""" abstract type SemScoresPredictMethod end +raw""" + SemRegressionScores + +*Regression/Thomson* latent scores. + +For centered observed data vector `y`, observed loading matrix `Λ`, observed residual +covariance `Ψ`, and marginal covariance `Φ` of the selected latent variables, the +regression scores are + +```math +\hat z_{\mathrm{reg}} += \Phi \Lambda^\top \Sigma^{-1} y, +\qquad +\Sigma = \Lambda \Phi \Lambda^\top + \Psi, +``` + +which is equivalent to the penalized least-squares problem + +```math +\hat z_{\mathrm{reg}} += \arg\min_z \left(\lVert L_\Psi (y - \Lambda z) \rVert_2^2 ++ \lVert L_\Phi z \rVert_2^2 ++ \alpha \lVert z \rVert_2^2 \right), +``` + +where `L_Ψ' L_Ψ = Ψ^{-1}` and `L_Φ' L_Φ = Φ^{-1}`. The prior term `\lVert L_\Phi z \rVert_2^2` +shrinks `z` toward the latent mean, penalizing directions with small prior variance more +strongly than directions with large prior variance. Internally the scores are computed via +an augmented QR solve instead of explicitly forming the normal equations. + +# References + +1. G. H. Thomson, *The Factorial Analysis of Human Ability*, University of London Press, + 1939. +2. Penn State STAT 505, *12.12 - Estimation of Factor Scores*: + https://online.stat.psu.edu/stat505/lesson/12/12.12 +3. UCLA Statistical Methods and Data Analytics, *A Practical Introduction to Factor Analysis*: + https://stats.oarc.ucla.edu/spss/seminars/introduction-to-factor-analysis/ +""" struct SemRegressionScores <: SemScoresPredictMethod end + +raw""" + SemBartlettScores + +*Bartlett* latent scores. + +For centered observed data vector `y`, observed loading matrix `Λ`, and observed +residual covariance `Ψ`, the Bartlett scores are + +```math +\hat z_{\mathrm{Bartlett}} += \left(\Lambda^\top \Psi^{-1} \Lambda + \alpha I\right)^{-1} + \Lambda^\top \Psi^{-1} y, +``` + +which is equivalent to the weighted ridge least-squares problem + +```math +\hat z_{\mathrm{Bartlett}} += \arg\min_z \left(\lVert L_\Psi (y - \Lambda z) \rVert_2^2 ++ \alpha \lVert z \rVert_2^2 \right), +``` + +where `L_Ψ' L_Ψ = Ψ^{-1}`. Internally the score operator is computed from the +augmented QR solve instead of explicitly forming the normal equations. + +# References + +1. M. S. Bartlett, *The Statistical Conception of Mental Factors*, British Journal of + Psychology, 28(1), 97-104, 1937. +2. Penn State STAT 505, *12.12 - Estimation of Factor Scores*: + https://online.stat.psu.edu/stat505/lesson/12/12.12 +3. UCLA Statistical Methods and Data Analytics, *A Practical Introduction to Factor Analysis*: + https://stats.oarc.ucla.edu/spss/seminars/introduction-to-factor-analysis/ +""" struct SemBartlettScores <: SemScoresPredictMethod end + +raw""" + SemAndersonRubinScores + +*Anderson-Rubin* latent scores. + +Let + +```math +B_{\mathrm{Bartlett}} += \left(\Lambda^\top \Psi^{-1} \Lambda + \alpha I\right)^{-1} + \Lambda^\top \Psi^{-1} +``` + +be the Bartlett score operator and let `Σ` be the model-implied observed covariance. +Define the model-implied covariance of the Bartlett scores by + +```math +C_{\mathrm{Bartlett}} += B_{\mathrm{Bartlett}} \Sigma B_{\mathrm{Bartlett}}^\top. +``` + +The Anderson-Rubin operator is the standardized Bartlett operator + +```math +B_{\mathrm{AR}} = C_{\mathrm{Bartlett}}^{-1/2} B_{\mathrm{Bartlett}}, +``` + +so the resulting scores satisfy + +```math +\operatorname{Cov}_{\Sigma}(\hat z_{\mathrm{AR}}) = I. +``` + +Equivalently, Anderson-Rubin scores are obtained in two steps: + +```math +\widetilde{z} += \arg\min_z \left(\lVert L_\Psi (y - \Lambda z) \rVert_2^2 ++ \alpha \lVert z \rVert_2^2 \right), +``` + +followed by the whitening transform + +```math +\hat z_{\mathrm{AR}} = C_{\mathrm{Bartlett}}^{-1/2} \widetilde{z}. +``` + +!!! warning "Different latent basis" + *Anderson-Rubin* scores live in a whitened basis of the latent subspace, not + generally in the original latent-variable basis parameterized by ``Φ``. Unless the + whitening transform is diagonal or identity, each reported Anderson-Rubin score is + a linear combination of the model's latent variables. If scores on the original + latent-variable scale are needed, prefer [`SemRegressionScores`](@ref) or + [`SemBartlettScores`](@ref). + +For correlated latent variables, these scores are standardized and orthogonalized, so they +do not preserve the original latent covariance ``Φ``; instead they return whitened Bartlett +coordinates with identity model-implied covariance. + +# References + +1. T. W. Anderson and H. Rubin, *Statistical Inference in Factor Analysis*, in + *Proceedings of the Third Berkeley Symposium on Mathematical Statistics and + Probability*, volume 5, 1956, pp. 111-150. +2. C. DiStefano, M. Zhu, and D. Mindrila, *Understanding and Using Factor Scores: + Considerations for the Applied Researcher*, Practical Assessment, Research and + Evaluation, 14(20), 2009. +3. UCLA Statistical Methods and Data Analytics, *A Practical Introduction to Factor Analysis*: + https://stats.oarc.ucla.edu/spss/seminars/introduction-to-factor-analysis/ +""" struct SemAndersonRubinScores <: SemScoresPredictMethod end function SemScoresPredictMethod(method::Symbol) @@ -134,6 +296,24 @@ function latent_scores_operator( return cov_lv * lv_FA' * ov_S⁻¹ end +""" + predict_latent_scores( + model::SemLoss, params, data = observed(model); + method = :regression, latent_vars = nothing, alpha = 0 + ) + +Predict latent scores for the selected latent variables from observed data. + +`method` selects the latent-score definition. See [`SemScoresPredictMethod`](@ref) and +its concrete implementations [`SemRegressionScores`](@ref), +[`SemBartlettScores`](@ref), and [`SemAndersonRubinScores`](@ref) for the mathematical +definitions and interpretation of each method. + +`latent_vars` selects which latent variables are scored. If `latent_vars = nothing`, all +latent variables in the model are scored. + +`alpha` is a non-negative ridge regularization parameter passed to the selected scoring method. +""" function predict_latent_scores( method::SemScoresPredictMethod, model::SemLoss, From 97d637dbdda7239581dc968d3aca8aabb7f328d7 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 37/41] predict: refactor calculation use QR solver approach for numerical stability --- src/frontend/predict.jl | 239 +++++++++++++++++++++++++++------------- 1 file changed, 164 insertions(+), 75 deletions(-) diff --git a/src/frontend/predict.jl b/src/frontend/predict.jl index 2afaa52f..27f540f1 100644 --- a/src/frontend/predict.jl +++ b/src/frontend/predict.jl @@ -30,6 +30,76 @@ function predict( return res end +# internal helper for calculating latent scores +# a callable object that recieves centered data matrix +# and returns the corresponding latent scores +abstract type ScoresSolver end + +# solver that uses QR decomposition for numerical stability +struct QRScoresSolver{F, C} <: ScoresSolver + factorization::F + obs_chol::C + extra_rhs_rows::Int +end + +function QRScoresSolver( + loadings::AbstractMatrix, + obs_cov::AbstractMatrix; + prior_cov::Union{AbstractMatrix, Nothing} = nothing, + alpha::Number = 0, +) + nobs, nlat = size(loadings) + T = float( + promote_type( + eltype(loadings), + eltype(obs_cov), + isnothing(prior_cov) ? Float64 : eltype(prior_cov), + typeof(alpha), + ), + ) + + Λ = Matrix{T}(loadings) + Ψ_chol = cholesky(Symmetric(Matrix{T}(obs_cov))) + I_lat = Matrix{T}(I, nlat, nlat) + + aug_lhs = Matrix{T}[Matrix(Ψ_chol.U' \ Λ)] + extra_rhs_rows = 0 + + if !isnothing(prior_cov) + Φ_chol = cholesky(Symmetric(Matrix{T}(prior_cov))) + push!(aug_lhs, Matrix(Φ_chol.U' \ I_lat)) + extra_rhs_rows += nlat + end + + if alpha != 0 + push!(aug_lhs, sqrt(T(alpha)) * I_lat) + extra_rhs_rows += nlat + end + + return QRScoresSolver(qr(reduce(vcat, aug_lhs), ColumnNorm()), Ψ_chol, extra_rhs_rows) +end + +function (solver::QRScoresSolver)(data::AbstractMatrix) + T = float(promote_type(eltype(data), eltype(solver.obs_chol))) + whitened_t = Matrix{T}((Matrix{T}(data) / solver.obs_chol.U)') + rhs = if solver.extra_rhs_rows == 0 + whitened_t + else + vcat(whitened_t, zeros(T, solver.extra_rhs_rows, size(data, 1))) + end + return permutedims(solver.factorization \ rhs) +end + +struct WhitenedScoresSolver{S <: ScoresSolver, C} <: ScoresSolver + base_solver::S + score_cov_chol::C +end + +function (solver::WhitenedScoresSolver)(data::AbstractMatrix) + base_scores = solver.base_solver(data) + return base_scores / solver.score_cov_chol.U +end + """ SemScoresPredictMethod @@ -49,6 +119,20 @@ each method. """ abstract type SemScoresPredictMethod end +""" + latent_scores_solver( + method::SemScoresPredictMethod, + implied::SemImplied, + latent_vars::AbstractVector, + A::AbstractMatrix, S::AbstractMatrix, + lv_I_A⁻¹::Union{AbstractMatrix, Nothing} = nothing; + alpha::Number = 0 + ) -> ScoresSolver + +Create a solver for the latent scores based on the specified prediction method. +""" +latent_scores_solver + raw""" SemRegressionScores @@ -90,6 +174,37 @@ an augmented QR solve instead of explicitly forming the normal equations. """ struct SemRegressionScores <: SemScoresPredictMethod end +function QRScoresSolver( + implied::SemImplied, + latent_vars::AbstractVector, + A::AbstractMatrix, + S::AbstractMatrix, + lv_I_A⁻¹::Union{AbstractMatrix, Nothing} = nothing; + alpha::Number = 0, +) + ram = implied.ram_matrices + lv_FA = Matrix(ram.F * A[:, latent_vars]) + if isnothing(lv_I_A⁻¹) + I_A = Matrix{eltype(A)}(I, size(A, 1), size(A, 2)) - A + lv_I_A⁻¹ = inv_matrix_rows(I_A, latent_vars) + end + cov_lv = X_A_Xt(S, lv_I_A⁻¹) + + obs_inds = observed_var_indices(ram) + obs_cov = Matrix(S[obs_inds, obs_inds]) + return QRScoresSolver(lv_FA, obs_cov; prior_cov = cov_lv, alpha) +end + +latent_scores_solver( + ::SemRegressionScores, + implied::SemImplied, + latent_vars::AbstractVector, + A::AbstractMatrix, + S::AbstractMatrix, + lv_I_A⁻¹::Union{AbstractMatrix, Nothing} = nothing; + alpha::Number = 0, +) = QRScoresSolver(implied, latent_vars, A, S, lv_I_A⁻¹; alpha) + raw""" SemBartlettScores @@ -126,6 +241,23 @@ augmented QR solve instead of explicitly forming the normal equations. """ struct SemBartlettScores <: SemScoresPredictMethod end +function latent_scores_solver( + ::SemBartlettScores, + implied::SemImplied, + latent_vars::AbstractVector, + A::AbstractMatrix, + S::AbstractMatrix, + lv_I_A⁻¹::Union{AbstractMatrix, Nothing} = nothing; + alpha::Number = 0, +) + ram = implied.ram_matrices + lv_FA = Matrix(ram.F * A[:, latent_vars]) + + obs_inds = observed_var_indices(ram) + obs_cov = Matrix(S[obs_inds, obs_inds]) + return QRScoresSolver(lv_FA, obs_cov; alpha) +end + raw""" SemAndersonRubinScores @@ -198,6 +330,24 @@ coordinates with identity model-implied covariance. """ struct SemAndersonRubinScores <: SemScoresPredictMethod end +function latent_scores_solver( + ::SemAndersonRubinScores, + implied::SemImplied, + latent_vars::AbstractVector, + A::AbstractMatrix, + S::AbstractMatrix, + lv_I_A⁻¹::Union{AbstractMatrix, Nothing} = nothing; + alpha::Number = 0, +) + nobs = nobserved_vars(implied) + + base_solver = QRScoresSolver(SemBartlettScores(), implied, latent_vars, A, S; alpha) + base_op = permutedims(base_solver(Matrix{eltype(S)}(I, nobs, nobs))) + + score_cov = Symmetric(X_A_Xt(implied.Σ, base_op)) + return WhitenedScoresSolver(base_solver, cholesky!(score_cov)) +end + function SemScoresPredictMethod(method::Symbol) if method == :regression return SemRegressionScores() @@ -222,78 +372,15 @@ predict_latent_scores( data::SemObserved = observed(sem_term(fit.model)), ) = predict_latent_scores(method, loss(sem_term(fit.model)), fit.solution, data) -function inv_cov!(A::AbstractMatrix) - if istril(A) - A = LowerTriangular(A) - elseif istriu(A) - A = UpperTriangular(A) - else - end - A_chol = Cholesky(A) - return inv!(A_chol) -end - -# wrapper that materializes A and S matrices from the model params -function latent_scores_operator( - ::Type{T}, - model::SemLoss, - latent_vars::AbstractVector, - params::AbstractVector; - kwargs..., -) where {T <: SemScoresPredictMethod} - ram = SEM.implied(model).ram_matrices - latent_scores_operator( - T, - SEM.implied(model), - latent_vars, - materialize(ram.A, params), - materialize(ram.S, params); - kwargs..., - ) -end - -function latent_scores_operator( - ::SemRegressionScores, - implied::SemImplied, - latent_vars::AbstractVector, - A::AbstractMatrix, - S::AbstractMatrix; - alpha::Number = 0, -) - implied = SEM.implied(model) - ram = implied.ram_matrices - - lv_FA = ram.F * A[:, latent_vars] - - cov_lv = if alpha == 0 - lv_I_A⁻¹ = inv(I - A)[latent_vars, :] - X_A_Xt(S, lv_I_A⁻¹) - else - inv(Xt_A_X(inv(S), I - A) + alpha * I)[latent_vars, latent_vars] +# return the rows of inv(A) for the specified row indices avoiding full inv(A) calculation +function inv_matrix_rows(A::AbstractMatrix, row_inds::AbstractVector{<:Integer}) + n = size(A, 1) + n == size(A, 2) || throw(DimensionMismatch("A must be square.")) + rhs = zeros(eltype(A), n, length(row_inds)) + for (j, row_ix) in enumerate(row_inds) + rhs[row_ix, j] = one(eltype(A)) end - Σ = implied.Σ - Σ⁻¹ = inv(Σ) - return cov_lv * lv_FA' * Σ⁻¹ -end - -function latent_scores_operator( - ::SemBartlettScores, - implied::SemImplied, - latent_vars::AbstractVector, - A::AbstractMatrix, - S::AbstractMatrix; - alpha::Number = 0, -) - ram = implied.ram_matrices - lv_FA = ram.F * A[:, latent_vars] - - obs_inds = observed_var_indices(ram) - ov_S⁻¹ = inv(S[obs_inds, obs_inds]) - cov_lv⁻¹ = Xt_A_X(ov_S⁻¹, lv_FA) - (alpha != 0) && (cov_lv⁻¹ += alpha * I) - cov_lv = inv(cov_lv⁻¹) - - return cov_lv * lv_FA' * ov_S⁻¹ + return copy((A' \ rhs)') end """ @@ -343,15 +430,17 @@ function predict_latent_scores( A = materialize(ram.A, params) S = materialize(ram.S, params) - lv_scores_op = latent_scores_operator(method, implied, lv_inds, A, S; alpha) + I_A = Matrix{eltype(A)}(I, size(A, 1), size(A, 2)) - A + lv_I_A⁻¹ = inv_matrix_rows(I_A, lv_inds) + lv_scores_solver = latent_scores_solver(method, implied, lv_inds, A, S, lv_I_A⁻¹; alpha) - data = + centered_data = data.data .- (isnothing(data.obs_mean) ? mean(data.data, dims = 1) : data.obs_mean') - lv_scores = data * lv_scores_op' + lv_scores = lv_scores_solver(centered_data) + # adjust the scores w.r.t the variable means if MeanStruct(implied) === HasMeanStruct M = materialize(ram.M, params) - lv_I_A⁻¹ = inv(I - A)[lv_inds, :] lv_scores .+= (lv_I_A⁻¹ * M)' end From 91f0ee8ef4e2fd42202e27cdb87b548236a2aabe Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 38/41] predict: add unit tests --- test/unit_tests/predict_scores.jl | 186 ++++++++++++++++++++++++++++++ test/unit_tests/unit_tests.jl | 1 + 2 files changed, 187 insertions(+) create mode 100644 test/unit_tests/predict_scores.jl diff --git a/test/unit_tests/predict_scores.jl b/test/unit_tests/predict_scores.jl new file mode 100644 index 00000000..6c152460 --- /dev/null +++ b/test/unit_tests/predict_scores.jl @@ -0,0 +1,186 @@ +using StructuralEquationModels, Test, LinearAlgebra, Statistics + +SEM = StructuralEquationModels + +function inverse_rows(A::AbstractMatrix, row_inds::AbstractVector{<:Integer}) + n = size(A, 1) + n == size(A, 2) || throw(DimensionMismatch("A must be square.")) + rhs = zeros(eltype(A), n, length(row_inds)) + for (j, row_ix) in pairs(row_inds) + rhs[row_ix, j] = one(eltype(A)) + end + return copy((A' \ rhs)') +end + +function bartlett_operator(Λ::AbstractMatrix, Ψ::AbstractMatrix; alpha::Real = 0.0) + nobs, nlat = size(Λ) + T = float(promote_type(eltype(Λ), eltype(Ψ), typeof(alpha))) + Ψ_chol = cholesky(Symmetric(Matrix{T}(Ψ))) + I_obs = Matrix{T}(I, nobs, nobs) + I_lat = Matrix{T}(I, nlat, nlat) + Ψ⁻¹Λ = Ψ_chol \ Matrix{T}(Λ) + Ψ⁻¹ = Ψ_chol \ I_obs + return (Λ' * Ψ⁻¹Λ + T(alpha) * I_lat) \ (Λ' * Ψ⁻¹) +end + +function regression_operator( + Λ::AbstractMatrix, + Ψ::AbstractMatrix, + Φ::AbstractMatrix; + alpha::Real = 0.0, +) + nobs, nlat = size(Λ) + T = float(promote_type(eltype(Λ), eltype(Ψ), eltype(Φ), typeof(alpha))) + Ψ_chol = cholesky(Symmetric(Matrix{T}(Ψ))) + Φ_chol = cholesky(Symmetric(Matrix{T}(Φ))) + I_obs = Matrix{T}(I, nobs, nobs) + I_lat = Matrix{T}(I, nlat, nlat) + Ψ⁻¹Λ = Ψ_chol \ Matrix{T}(Λ) + Ψ⁻¹ = Ψ_chol \ I_obs + Φ⁻¹ = Φ_chol \ I_lat + return (Λ' * Ψ⁻¹Λ + Φ⁻¹ + T(alpha) * I_lat) \ (Λ' * Ψ⁻¹) +end + +function anderson_rubin_operator( + Λ::AbstractMatrix, + Ψ::AbstractMatrix, + Σ::AbstractMatrix; + alpha::Real = 0.0, +) + B_bartlett = bartlett_operator(Λ, Ψ; alpha) + C_bartlett = Symmetric(B_bartlett * Σ * B_bartlett') + C_chol = cholesky(C_bartlett) + return C_chol.U' \ B_bartlett +end + +@testset "predict_latent_scores formulas" begin + A = [ + 0 0 0 0 1.0 0; + 0 0 0 0 :lambda21 0; + 0 0 0 0 0 1.0; + 0 0 0 0 0 :lambda42; + 0 0 0 0 0 0; + 0 0 0 0 0 0 + ] + S = [ + :psi1 0 0 0 0 0; + 0 :psi2 0 0 0 0; + 0 0 :psi3 0 0 0; + 0 0 0 :psi4 0 0; + 0 0 0 0 :phi11 :phi12; + 0 0 0 0 :phi12 :phi22 + ] + F = [ + 1.0 0 0 0 0 0; + 0 1 0 0 0 0; + 0 0 1 0 0 0; + 0 0 0 1 0 0 + ] + params_syms = [:lambda21, :lambda42, :psi1, :psi2, :psi3, :psi4, :phi11, :phi12, :phi22] + spec = RAMMatrices(; + A, + S, + F, + params = params_syms, + colnames = [:y1, :y2, :y3, :y4, :eta1, :eta2], + ) + + obs_colnames = [:y1, :y2, :y3, :y4] + model = SemML(SemObservedData(randn(24, 4), obs_colnames = obs_colnames), RAM(spec)) + data_values = [ + 1.1 0.3 -0.4 0.8; + 0.7 -0.2 0.2 1.1; + -0.3 -0.7 0.5 0.1; + 0.5 0.9 -0.1 -0.4 + ] + data = SemObservedData(data_values, obs_colnames = obs_colnames) + params = [0.8, 0.9, 0.3, 0.4, 0.35, 0.45, 1.0, 0.2, 1.3] + + implied = SEM.imply(model) + SEM.update!(SEM.EvaluationTargets(0.0, nothing, nothing), implied, params) + + ram = implied.ram_matrices + lv_vars = [:eta1, :eta2] + lv_inds = SEM.check_var_indices(ram, lv_vars, allow_observed = false, normalize = true) + obs_inds = SEM.observed_var_indices(ram) + + A_mtx = SEM.materialize(ram.A, params) + S_mtx = SEM.materialize(ram.S, params) + centered_data = data.data .- mean(data.data, dims = 1) + Λ = Matrix(ram.F * A_mtx[:, lv_inds]) + Ψ = Matrix(S_mtx[obs_inds, obs_inds]) + I_A = Matrix{Float64}(I, size(A_mtx, 1), size(A_mtx, 2)) - A_mtx + lv_I_A⁻¹ = inverse_rows(I_A, lv_inds) + Φ = SEM.X_A_Xt(S_mtx, lv_I_A⁻¹) + Σ = Matrix(implied.Σ) + + bartlett_alpha = 0.15 + bartlett_scores = SEM.predict_latent_scores( + model, + params, + data; + method = :Bartlett, + latent_vars = lv_vars, + alpha = bartlett_alpha, + ) + bartlett_op = bartlett_operator(Λ, Ψ; alpha = bartlett_alpha) + @test bartlett_scores ≈ centered_data * bartlett_op' rtol = 1e-10 atol = 1e-10 + + regression_alpha = 0.2 + regression_scores = SEM.predict_latent_scores( + model, + params, + data; + method = :regression, + latent_vars = lv_vars, + alpha = regression_alpha, + ) + regression_op = regression_operator(Λ, Ψ, Φ; alpha = regression_alpha) + @test regression_scores ≈ centered_data * regression_op' rtol = 1e-10 atol = 1e-10 + + regression_scores_0 = SEM.predict_latent_scores( + model, + params, + data; + method = :regression, + latent_vars = lv_vars, + alpha = 0.0, + ) + Σ_chol = cholesky(Symmetric(Σ)) + regression_op_0 = Φ * Λ' * (Σ_chol \ Matrix{Float64}(I, size(Σ, 1), size(Σ, 2))) + @test regression_scores_0 ≈ centered_data * regression_op_0' rtol = 1e-10 atol = 1e-10 + + bartlett_scores_0 = SEM.predict_latent_scores( + model, + params, + data; + method = :Bartlett, + latent_vars = lv_vars, + alpha = 0.0, + ) + @test !isapprox(regression_scores_0, bartlett_scores_0; rtol = 1e-6, atol = 1e-6) + + ar_alpha = 0.15 + ar_scores = SEM.predict_latent_scores( + model, + params, + data; + method = :AndersonRubin, + latent_vars = lv_vars, + alpha = ar_alpha, + ) + ar_op = anderson_rubin_operator(Λ, Ψ, Σ; alpha = ar_alpha) + @test ar_scores ≈ centered_data * ar_op' rtol = 1e-10 atol = 1e-10 + @test ar_op * Σ * ar_op' ≈ Matrix{Float64}(I, size(ar_op, 1), size(ar_op, 1)) rtol = + 1e-10 atol = 1e-10 + + ar_scores_0 = SEM.predict_latent_scores( + model, + params, + data; + method = :AndersonRubin, + latent_vars = lv_vars, + alpha = 0.0, + ) + @test !isapprox(ar_scores_0, bartlett_scores_0; rtol = 1e-6, atol = 1e-6) +end diff --git a/test/unit_tests/unit_tests.jl b/test/unit_tests/unit_tests.jl index 4d7dad7c..466e7443 100644 --- a/test/unit_tests/unit_tests.jl +++ b/test/unit_tests/unit_tests.jl @@ -8,6 +8,7 @@ available_tests = Dict( "specification" => "SemSpecification", "model" => "Sem model", "StatsAPI" => "StatsAPI", + "predict_scores" => "Predict latent variables scores", ) # Determine which tests to run based on command-line arguments From 83f08d4008f3111ae3b12aa84b564fc0f976d58a Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 39/41] predict(): add prior_cov_alpha kwarg pr_cov_alpha = 0 - Bartlett pr_cov_alpha = 1 - regression Co-authored-by: Copilot --- src/frontend/predict.jl | 160 ++++++++++++++++++++++++------ test/unit_tests/predict_scores.jl | 68 ++++++++++++- 2 files changed, 193 insertions(+), 35 deletions(-) diff --git a/src/frontend/predict.jl b/src/frontend/predict.jl index 27f540f1..743df87c 100644 --- a/src/frontend/predict.jl +++ b/src/frontend/predict.jl @@ -46,14 +46,22 @@ function QRScoresSolver( loadings::AbstractMatrix, obs_cov::AbstractMatrix; prior_cov::Union{AbstractMatrix, Nothing} = nothing, + prior_cov_alpha::Number = 1, alpha::Number = 0, ) - nobs, nlat = size(loadings) + alpha >= 0 || + throw(ArgumentError("The regularization parameter alpha must be non-negative")) + prior_cov_alpha >= 0 || throw( + ArgumentError("The regularization parameter prior_cov_alpha must be non-negative"), + ) + + _, nlat = size(loadings) T = float( promote_type( eltype(loadings), eltype(obs_cov), isnothing(prior_cov) ? Float64 : eltype(prior_cov), + typeof(prior_cov_alpha), typeof(alpha), ), ) @@ -65,9 +73,9 @@ function QRScoresSolver( aug_lhs = Matrix{T}[Matrix(Ψ_chol.U' \ Λ)] extra_rhs_rows = 0 - if !isnothing(prior_cov) + if !isnothing(prior_cov) && prior_cov_alpha != 0 Φ_chol = cholesky(Symmetric(Matrix{T}(prior_cov))) - push!(aug_lhs, Matrix(Φ_chol.U' \ I_lat)) + push!(aug_lhs, sqrt(T(prior_cov_alpha)) * Matrix(Φ_chol.U' \ I_lat)) extra_rhs_rows += nlat end @@ -76,7 +84,8 @@ function QRScoresSolver( extra_rhs_rows += nlat end - return QRScoresSolver(qr(reduce(vcat, aug_lhs), ColumnNorm()), Ψ_chol, extra_rhs_rows) + lhs_qr = qr(reduce(vcat, aug_lhs), ColumnNorm()) + return QRScoresSolver(lhs_qr, Ψ_chol, extra_rhs_rows) end function (solver::QRScoresSolver)(data::AbstractMatrix) @@ -126,7 +135,8 @@ abstract type SemScoresPredictMethod end latent_vars::AbstractVector, A::AbstractMatrix, S::AbstractMatrix, lv_I_A⁻¹::Union{AbstractMatrix, Nothing} = nothing; - alpha::Number = 0 + alpha::Number = 0, + prior_cov_alpha::Number = 1, ) -> ScoresSolver Create a solver for the latent scores based on the specified prediction method. @@ -154,14 +164,15 @@ which is equivalent to the penalized least-squares problem ```math \hat z_{\mathrm{reg}} = \arg\min_z \left(\lVert L_\Psi (y - \Lambda z) \rVert_2^2 -+ \lVert L_\Phi z \rVert_2^2 ++ \lambda_\Phi \lVert L_\Phi z \rVert_2^2 + \alpha \lVert z \rVert_2^2 \right), ``` -where `L_Ψ' L_Ψ = Ψ^{-1}` and `L_Φ' L_Φ = Φ^{-1}`. The prior term `\lVert L_\Phi z \rVert_2^2` -shrinks `z` toward the latent mean, penalizing directions with small prior variance more -strongly than directions with large prior variance. Internally the scores are computed via -an augmented QR solve instead of explicitly forming the normal equations. +where `L_Ψ' L_Ψ = Ψ^{-1}` and `L_Φ' L_Φ = Φ^{-1}`. The classical regression/Thomson +scores correspond to `prior_cov_alpha = λ_Φ = 1`. Setting `prior_cov_alpha = 0` +switches off the latent-covariance prior and recovers the Bartlett objective with the +same `alpha`. Internally the scores are computed via an augmented QR solve instead of +explicitly forming the normal equations. # References @@ -181,18 +192,30 @@ function QRScoresSolver( S::AbstractMatrix, lv_I_A⁻¹::Union{AbstractMatrix, Nothing} = nothing; alpha::Number = 0, + prior_cov_alpha::Number = 0, ) ram = implied.ram_matrices lv_FA = Matrix(ram.F * A[:, latent_vars]) - if isnothing(lv_I_A⁻¹) - I_A = Matrix{eltype(A)}(I, size(A, 1), size(A, 2)) - A - lv_I_A⁻¹ = inv_matrix_rows(I_A, latent_vars) + + prior_cov = if prior_cov_alpha != 0 + if isnothing(lv_I_A⁻¹) + I_A = Matrix{eltype(A)}(I, size(A, 1), size(A, 2)) - A + lv_I_A⁻¹ = inv_matrix_rows(I_A, latent_vars) + end + # postpone scaling by prior_cov_alpha until the Cholesky factor + SEM.trunc_eigvals( + Symmetric(X_A_Xt(S, lv_I_A⁻¹)), + 1e-6, + mtx_label = "prior_cov", + verbose = false, + ) + else + nothing end - cov_lv = X_A_Xt(S, lv_I_A⁻¹) obs_inds = observed_var_indices(ram) obs_cov = Matrix(S[obs_inds, obs_inds]) - return QRScoresSolver(lv_FA, obs_cov; prior_cov = cov_lv, alpha) + return QRScoresSolver(lv_FA, obs_cov; prior_cov, prior_cov_alpha, alpha) end latent_scores_solver( @@ -203,7 +226,16 @@ latent_scores_solver( S::AbstractMatrix, lv_I_A⁻¹::Union{AbstractMatrix, Nothing} = nothing; alpha::Number = 0, -) = QRScoresSolver(implied, latent_vars, A, S, lv_I_A⁻¹; alpha) + prior_cov_alpha::Union{Number, Nothing} = nothing, +) = QRScoresSolver( + implied, + latent_vars, + A, + S, + lv_I_A⁻¹; + alpha, + prior_cov_alpha = something(prior_cov_alpha, 1), +) raw""" SemBartlettScores @@ -227,8 +259,9 @@ which is equivalent to the weighted ridge least-squares problem + \alpha \lVert z \rVert_2^2 \right), ``` -where `L_Ψ' L_Ψ = Ψ^{-1}`. Internally the score operator is computed from the -augmented QR solve instead of explicitly forming the normal equations. +where `L_Ψ' L_Ψ = Ψ^{-1}`. Equivalently, this is the regression-score objective with +`prior_cov_alpha = 0`, i.e. with the latent-covariance prior switched off. Internally +the score operator is computed via the same augmented QR solve as the regression case. # References @@ -241,7 +274,7 @@ augmented QR solve instead of explicitly forming the normal equations. """ struct SemBartlettScores <: SemScoresPredictMethod end -function latent_scores_solver( +latent_scores_solver( ::SemBartlettScores, implied::SemImplied, latent_vars::AbstractVector, @@ -249,14 +282,16 @@ function latent_scores_solver( S::AbstractMatrix, lv_I_A⁻¹::Union{AbstractMatrix, Nothing} = nothing; alpha::Number = 0, + prior_cov_alpha::Nothing = nothing, +) = latent_scores_solver_qr( + implied, + latent_vars, + A, + S, + lv_I_A⁻¹; + alpha, + prior_cov_alpha = 0, ) - ram = implied.ram_matrices - lv_FA = Matrix(ram.F * A[:, latent_vars]) - - obs_inds = observed_var_indices(ram) - obs_cov = Matrix(S[obs_inds, obs_inds]) - return QRScoresSolver(lv_FA, obs_cov; alpha) -end raw""" SemAndersonRubinScores @@ -338,10 +373,19 @@ function latent_scores_solver( S::AbstractMatrix, lv_I_A⁻¹::Union{AbstractMatrix, Nothing} = nothing; alpha::Number = 0, + prior_cov_alpha::Nothing = nothing, ) nobs = nobserved_vars(implied) - base_solver = QRScoresSolver(SemBartlettScores(), implied, latent_vars, A, S; alpha) + base_solver = QRScoresSolver( + SemBartlettScores(), + implied, + latent_vars, + A, + S; + alpha, + prior_cov_alpha = 0, + ) base_op = permutedims(base_solver(Matrix{eltype(S)}(I, nobs, nobs))) score_cov = Symmetric(X_A_Xt(implied.Σ, base_op)) @@ -364,13 +408,34 @@ predict_latent_scores( fit::SemFit, data::SemObserved = observed(sem_term(fit.model)); method::Symbol = :regression, -) = predict_latent_scores(SemScoresPredictMethod(method), fit, data) + latent_vars::Union{AbstractVector, Nothing} = nothing, + alpha::Number = 0, + prior_cov_alpha::Union{Number, Nothing} = nothing, +) = predict_latent_scores( + SemScoresPredictMethod(method), + fit, + data; + latent_vars, + alpha, + prior_cov_alpha, +) predict_latent_scores( method::SemScoresPredictMethod, fit::SemFit, - data::SemObserved = observed(sem_term(fit.model)), -) = predict_latent_scores(method, loss(sem_term(fit.model)), fit.solution, data) + data::SemObserved = observed(sem_term(fit.model)); + latent_vars::Union{AbstractVector, Nothing} = nothing, + alpha::Number = 0, + prior_cov_alpha::Union{Number, Nothing} = nothing, +) = predict_latent_scores( + method, + loss(sem_term(fit.model)), + fit.solution, + data; + latent_vars, + alpha, + prior_cov_alpha, +) # return the rows of inv(A) for the specified row indices avoiding full inv(A) calculation function inv_matrix_rows(A::AbstractMatrix, row_inds::AbstractVector{<:Integer}) @@ -386,7 +451,8 @@ end """ predict_latent_scores( model::SemLoss, params, data = observed(model); - method = :regression, latent_vars = nothing, alpha = 0 + method = :regression, latent_vars = nothing, + alpha = 0, prior_cov_alpha = nothing ) Predict latent scores for the selected latent variables from observed data. @@ -400,6 +466,10 @@ definitions and interpretation of each method. latent variables in the model are scored. `alpha` is a non-negative ridge regularization parameter passed to the selected scoring method. +`prior_cov_alpha` is the non-negative weight of the latent-covariance prior used by +[`SemRegressionScores`](@ref). The default `prior_cov_alpha = 1` gives the classical +regression/Thomson scores, while `prior_cov_alpha = 0` reduces the regression objective +to the Bartlett objective with the same `alpha`. """ function predict_latent_scores( method::SemScoresPredictMethod, @@ -408,6 +478,7 @@ function predict_latent_scores( data::SemObserved = observed(model); latent_vars::Union{AbstractVector, Nothing} = nothing, alpha::Number = 0, + prior_cov_alpha::Union{Number, Nothing} = nothing, ) nobserved_vars(data) == nobserved_vars(model) || throw( DimensionMismatch( @@ -419,8 +490,20 @@ function predict_latent_scores( "The length of parameters vector ($(length(params))) does not match the number of parameters in the model ($(nparams(model)))", ), ) - (alpha < 0) && + (alpha >= 0) || throw(ArgumentError("The regularization parameter alpha must be non-negative")) + if method isa Union{SemBartlettScores, SemAndersonRubinScores} + !isnothing(prior_cov_alpha) + @warn "prior_cov_alpha is only supported for regression scores, ignored for $(typeof(method))" + prior_cov_alpha = nothing + end + isnothing(prior_cov_alpha) || + prior_cov_alpha >= 0 || + throw( + ArgumentError( + "The regularization parameter prior_cov_alpha must be non-negative", + ), + ) implied = SEM.implied(model) ram = implied.ram_matrices @@ -432,7 +515,16 @@ function predict_latent_scores( S = materialize(ram.S, params) I_A = Matrix{eltype(A)}(I, size(A, 1), size(A, 2)) - A lv_I_A⁻¹ = inv_matrix_rows(I_A, lv_inds) - lv_scores_solver = latent_scores_solver(method, implied, lv_inds, A, S, lv_I_A⁻¹; alpha) + lv_scores_solver = latent_scores_solver( + method, + implied, + lv_inds, + A, + S, + lv_I_A⁻¹; + alpha, + prior_cov_alpha, + ) centered_data = data.data .- (isnothing(data.obs_mean) ? mean(data.data, dims = 1) : data.obs_mean') @@ -454,6 +546,7 @@ predict_latent_scores( method::Symbol = :regression, latent_vars::Union{AbstractVector, Nothing} = nothing, alpha::Number = 0, + prior_cov_alpha::Union{Number, Nothing} = nothing, ) = predict_latent_scores( SemScoresPredictMethod(method), model, @@ -461,4 +554,5 @@ predict_latent_scores( data; latent_vars, alpha, + prior_cov_alpha, ) diff --git a/test/unit_tests/predict_scores.jl b/test/unit_tests/predict_scores.jl index 6c152460..47c3c5b6 100644 --- a/test/unit_tests/predict_scores.jl +++ b/test/unit_tests/predict_scores.jl @@ -28,9 +28,18 @@ function regression_operator( Ψ::AbstractMatrix, Φ::AbstractMatrix; alpha::Real = 0.0, + prior_cov_alpha::Real = 1.0, ) nobs, nlat = size(Λ) - T = float(promote_type(eltype(Λ), eltype(Ψ), eltype(Φ), typeof(alpha))) + T = float( + promote_type( + eltype(Λ), + eltype(Ψ), + eltype(Φ), + typeof(alpha), + typeof(prior_cov_alpha), + ), + ) Ψ_chol = cholesky(Symmetric(Matrix{T}(Ψ))) Φ_chol = cholesky(Symmetric(Matrix{T}(Φ))) I_obs = Matrix{T}(I, nobs, nobs) @@ -38,7 +47,7 @@ function regression_operator( Ψ⁻¹Λ = Ψ_chol \ Matrix{T}(Λ) Ψ⁻¹ = Ψ_chol \ I_obs Φ⁻¹ = Φ_chol \ I_lat - return (Λ' * Ψ⁻¹Λ + Φ⁻¹ + T(alpha) * I_lat) \ (Λ' * Ψ⁻¹) + return (Λ' * Ψ⁻¹Λ + T(prior_cov_alpha) * Φ⁻¹ + T(alpha) * I_lat) \ (Λ' * Ψ⁻¹) end function anderson_rubin_operator( @@ -138,6 +147,26 @@ end regression_op = regression_operator(Λ, Ψ, Φ; alpha = regression_alpha) @test regression_scores ≈ centered_data * regression_op' rtol = 1e-10 atol = 1e-10 + regression_prior_cov_alpha = 0.35 + regression_scores_tuned = SEM.predict_latent_scores( + model, + params, + data; + method = :regression, + latent_vars = lv_vars, + alpha = regression_alpha, + prior_cov_alpha = regression_prior_cov_alpha, + ) + regression_op_tuned = regression_operator( + Λ, + Ψ, + Φ; + alpha = regression_alpha, + prior_cov_alpha = regression_prior_cov_alpha, + ) + @test regression_scores_tuned ≈ centered_data * regression_op_tuned' rtol = 1e-10 atol = + 1e-10 + regression_scores_0 = SEM.predict_latent_scores( model, params, @@ -158,8 +187,43 @@ end latent_vars = lv_vars, alpha = 0.0, ) + @test_throws ArgumentError SEM.predict_latent_scores( + model, + params, + data; + method = :Bartlett, + latent_vars = lv_vars, + prior_cov_alpha = 0.1, + ) + @test_throws ArgumentError SEM.predict_latent_scores( + model, + params, + data; + method = :AndersonRubin, + latent_vars = lv_vars, + prior_cov_alpha = 0.1, + ) + regression_scores_bartlett = SEM.predict_latent_scores( + model, + params, + data; + method = :regression, + latent_vars = lv_vars, + alpha = 0.0, + prior_cov_alpha = 0.0, + ) + @test regression_scores_bartlett ≈ bartlett_scores_0 rtol = 1e-10 atol = 1e-10 @test !isapprox(regression_scores_0, bartlett_scores_0; rtol = 1e-6, atol = 1e-6) + @test_throws ArgumentError SEM.predict_latent_scores( + model, + params, + data; + method = :regression, + latent_vars = lv_vars, + prior_cov_alpha = -0.1, + ) + ar_alpha = 0.15 ar_scores = SEM.predict_latent_scores( model, From adb31a5509a6929cac5d7f8856fa7ab7db6a76f9 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 40/41] SemVariablesTransform --- src/frontend/predict.jl | 52 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/src/frontend/predict.jl b/src/frontend/predict.jl index 743df87c..8cc800cf 100644 --- a/src/frontend/predict.jl +++ b/src/frontend/predict.jl @@ -109,6 +109,58 @@ function (solver::WhitenedScoresSolver)(data::AbstractMatrix) return base_scores / solver.score_cov_chol.U end +const BasisTransformOperator = Union{AbstractMatrix, UniformScaling} + +""" + SemVariablesTransform + +Linear change-of-basis metadata for centered latent scores. + +`old_to_new` maps centered variables from the original basis to the transformed basis. +`new_to_old` applies the inverse map. + +For [`SemAndersonRubinScores`](@ref), the method basis is the whitened Anderson-Rubin +coordinate system. For [`SemRegressionScores`](@ref) and [`SemBartlettScores`](@ref), the +transform is the identity. +""" +struct SemVariablesTransform{TN <: BasisTransformOperator, TO <: BasisTransformOperator} + vars::Vector{Symbol} + old_to_new::TN + new_to_old::TO +end + +""" + (transform::SemVariablesTransform)(scores; inverse = false) + +Apply a centered latent-score basis transform. + +When `inverse = false` (default), the rows of `scores` are interpreted in the model's +original latent-variable basis and mapped to the method basis stored in `transform`. +When `inverse = true`, the rows are interpreted in the method basis and mapped back to +the original latent-variable basis. + +The transform acts on centered scores. For uncentered scores, subtract the corresponding +latent means before applying the transform and add the target-basis means after. +""" +function (transform::SemVariablesTransform)(scores::AbstractMatrix; inverse::Bool = false) + size(scores, 2) == length(transform.vars) || throw( + DimensionMismatch( + "Number of score columns ($(size(scores, 2))) does not match transform size ($(length(transform.vars))).", + ), + ) + basis_mtx = inverse ? transform.new_to_old : transform.old_to_new + return scores * basis_mtx +end + +function (transform::SemVariablesTransform)(scores::AbstractVector; inverse::Bool = false) + length(scores) == length(transform.vars) || throw( + DimensionMismatch( + "Score vector length ($(length(scores))) does not match transform size ($(length(transform.vars))).", + ), + ) + return vec(transform(reshape(collect(scores), 1, :); inverse)) +end + """ SemScoresPredictMethod From 6266801182ae5fe4ba043b8d395321563c9a587b Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 11 May 2026 18:27:36 -0700 Subject: [PATCH 41/41] score_basis_transform() --- src/frontend/predict.jl | 93 +++++++++++++++++++++++++++++++ test/unit_tests/predict_scores.jl | 25 +++++++++ 2 files changed, 118 insertions(+) diff --git a/src/frontend/predict.jl b/src/frontend/predict.jl index 8cc800cf..aef61840 100644 --- a/src/frontend/predict.jl +++ b/src/frontend/predict.jl @@ -456,6 +456,99 @@ function SemScoresPredictMethod(method::Symbol) end end +""" + score_basis_transform(model::SemLoss, params; method = :regression, + latent_vars = nothing, alpha = 0, + prior_cov_alpha = nothing) + +Return the centered latent-score basis transform associated with a score-prediction +method. + +For [`SemRegressionScores`](@ref) and [`SemBartlettScores`](@ref), the result is the +identity transform. For [`SemAndersonRubinScores`](@ref), the result stores the whitening +map between the model's original latent-variable basis and the Anderson-Rubin basis. + +The returned object can be used to map centered Anderson-Rubin scores back to the +original latent-variable basis via: + +```julia +orig_scores = transform(ar_scores; inverse = true) +``` + +`alpha` matters only for [`SemAndersonRubinScores`](@ref), because the whitening +transform is built from the ridge-regularized Bartlett score operator. For regression and +Bartlett scores the transform is the identity, so `alpha` does not affect the result. + +`prior_cov_alpha` is currently accepted for API symmetry with +[`predict_latent_scores`](@ref) and for forward compatibility, but it does not affect the +returned transform for the currently implemented score methods. +""" +function score_basis_transform( + method::SemScoresPredictMethod, + model::SemLoss, + params::AbstractVector; + latent_vars::Union{AbstractVector, Nothing} = nothing, + alpha::Number = 0, + prior_cov_alpha::Union{Number, Nothing} = nothing, +) + length(params) == nparams(model) || throw( + DimensionMismatch( + "The length of parameters vector ($(length(params))) does not match the number of parameters in the model ($(nparams(model))).", + ), + ) + alpha >= 0 || + throw(ArgumentError("The regularization parameter alpha must be non-negative")) + isnothing(prior_cov_alpha) || + prior_cov_alpha >= 0 || + throw( + ArgumentError( + "The regularization parameter prior_cov_alpha must be non-negative", + ), + ) + + implied = imply(model) + ram = implied.ram_matrices + lvar_inds = + check_var_indices(ram, latent_vars, allow_observed = false, normalize = true) + lvars = vars(ram)[lvar_inds] + if !(method isa SemAndersonRubinScores) # identity transform + return SemVariablesTransform(lvars, I, I) + end + + update!(EvaluationTargets(0.0, nothing, nothing), implied, params) + + A = materialize(ram.A, params) + S = materialize(ram.S, params) + T = float(promote_type(eltype(A), eltype(S), typeof(alpha))) + + I_A = Matrix{eltype(A)}(I, size(A, 1), size(A, 2)) - A + lv_I_A⁻¹ = inverse_rows(I_A, lvar_inds) + base_solver = + QRScoresSolver(implied, lvar_inds, A, S, lv_I_A⁻¹; alpha, prior_cov_alpha = 0) + nobs = nobserved_vars(implied) + base_op = permutedims(base_solver(Matrix{T}(I, nobs, nobs))) + score_cov = Symmetric(X_A_Xt(implied.Σ, base_op)) + score_cov_chol = cholesky!(score_cov) + + return SemVariablesTransform(lvars, I / score_cov_chol.U, score_cov_chol.U) +end + +score_basis_transform( + method::Symbol, + model::SemLoss, + params::Union{AbstractVector, Nothing} = nothing; + latent_vars::Union{AbstractVector, Nothing} = nothing, + alpha::Number = 0, + prior_cov_alpha::Union{Number, Nothing} = nothing, +) = score_basis_transform( + SemScoresPredictMethod(method), + model, + params; + latent_vars, + alpha, + prior_cov_alpha, +) + predict_latent_scores( fit::SemFit, data::SemObserved = observed(sem_term(fit.model)); diff --git a/test/unit_tests/predict_scores.jl b/test/unit_tests/predict_scores.jl index 47c3c5b6..9d6f9117 100644 --- a/test/unit_tests/predict_scores.jl +++ b/test/unit_tests/predict_scores.jl @@ -238,6 +238,31 @@ end @test ar_op * Σ * ar_op' ≈ Matrix{Float64}(I, size(ar_op, 1), size(ar_op, 1)) rtol = 1e-10 atol = 1e-10 + ar_transform = SEM.score_basis_transform( + model, + params; + method = :AndersonRubin, + latent_vars = lv_vars, + alpha = ar_alpha, + ) + @test ar_transform.vars == lv_vars + @test ar_transform.old_to_new * ar_transform.new_to_old ≈ Matrix{Float64}(I, 2, 2) atol = + 1e-10 rtol = 1e-10 + @test ar_transform(bartlett_scores) ≈ ar_scores rtol = 1e-10 atol = 1e-10 + @test ar_transform(ar_scores; inverse = true) ≈ bartlett_scores rtol = 1e-10 atol = + 1e-10 + + bartlett_transform = SEM.score_basis_transform( + model, + params; + method = :Bartlett, + latent_vars = lv_vars, + alpha = bartlett_alpha, + ) + @test bartlett_transform.old_to_new == I + @test bartlett_transform.new_to_old == I + @test bartlett_transform(bartlett_scores) ≈ bartlett_scores rtol = 1e-12 atol = 1e-12 + ar_scores_0 = SEM.predict_latent_scores( model, params,