diff --git a/demo/sensitivity/kucherenkoindices.jl b/demo/sensitivity/kucherenkoindices.jl new file mode 100644 index 000000000..6b1dd28f8 --- /dev/null +++ b/demo/sensitivity/kucherenkoindices.jl @@ -0,0 +1,103 @@ +using UncertaintyQuantification +using LinearAlgebra + +# Kucherenko et al. (2012) (DOI: 10.1016/j.cpc.2011.12.020) --- Test Case 2: Portfolio model with analytical indices --- + +μ = [0.0, 0.0, 250.0, 400.0] + +σ1, σ2, σ3, σ4 = sqrt(16.0), sqrt(4.0), sqrt(4e4), sqrt(9e4) +σ12, σ21 = 2.4, 2.4 +σ34, σ43 = -1.8e4, -1.8e4 + +Σ = [ + 16.0 2.4 0.0 0.0; + 2.4 4.0 0.0 0.0; + 0.0 0.0 4e4 -1.8e4; + 0.0 0.0 -1.8e4 9e4 +] + +# Marginals +marginals = RandomVariable[ + RandomVariable(Normal(μ[1], σ1), :x1), + RandomVariable(Normal(μ[2], σ2), :x2), + RandomVariable(Normal(μ[3], σ3), :x3), + RandomVariable(Normal(μ[4], σ4), :x4) +] + +# Correlation matrix for Gaussian copula +Dvec = sqrt.(diag(Σ)) +R = Σ ./ (Dvec * Dvec') + +inputs = [ + JointDistribution(GaussianCopula(R), marginals) +] + +model = Model(df -> df.x1 .* df.x3 .+ df.x2 .* df.x4, :y) +sim = MonteCarlo(200000) + + +# Analytical values +μ3, μ4 = μ[3], μ[4] +σ2_1, σ2_2, σ2_3, σ2_4 = 16.0, 4.0, 4e4, 9e4 +σ12, σ34 = 2.4, -1.8e4 +ρ12 = σ12 / (σ1 * σ2) +ρ34 = σ34 / (σ3 * σ4) + +D = σ2_1 * (σ2_3 + μ3^2) + σ2_2 * (σ2_4 + μ4^2) + 2 * σ12 * (σ34 + μ3 * μ4) + +S1_analytical = σ2_1 * (μ3 + μ4 * ρ12 * σ2 / σ1)^2 / D +ST1_analytical = σ2_1 * (1 - ρ12^2) * (σ2_3 + μ3^2) / D + +S2_analytical = σ2_2 * (μ4 + μ3 * ρ12 * σ1 / σ2)^2 / D +ST2_analytical = σ2_2 * (1 - ρ12^2) * (σ2_4 + μ4^2) / D + +S3_analytical = 0.0 +ST3_analytical = σ2_1 * σ2_3 * (1 - ρ34^2) / D + +S4_analytical = 0.0 +ST4_analytical = σ2_2 * σ2_4 * (1 - ρ34^2) / D + + + +try + indices, bin_samples = kucherenkoindices_bin([model], inputs, [:y], sim; min_bin_sample_multi_dims=5) + println("Sample-based Kucherenko Indices calculation using bins:") + println(indices) + + tol = 0.01 + @assert abs(indices.FirstOrder[1] - S1_analytical) < tol "S1 error too large" + @assert abs(indices.FirstOrder[2] - S2_analytical) < tol "S2 error too large" + @assert abs(indices.FirstOrder[3] - S3_analytical) < tol "S3 error too large" + @assert abs(indices.FirstOrder[4] - S4_analytical) < tol "S4 error too large" + @assert abs(indices.TotalEffect[1] - ST1_analytical) < tol "ST1 error too large" + @assert abs(indices.TotalEffect[2] - ST2_analytical) < tol "ST2 error too large" + @assert abs(indices.TotalEffect[3] - ST3_analytical) < tol "ST3 error too large" + @assert abs(indices.TotalEffect[4] - ST4_analytical) < tol "ST4 error too large" + + println("Portfolio model Kucherenko indices validation passed - all values within tolerance.") + +catch e + println("Error computing Portfolio model Kucherenko indices: $e") + rethrow(e) +end + +try + indices = indices = kucherenkoindices([model], inputs, [:y], sim) + println("Standard Kucherenko Indices calculation:") + println(indices) + + tol = 0.01 + @assert abs(indices.FirstOrder[1] - S1_analytical) < tol "S1 error too large" + @assert abs(indices.FirstOrder[2] - S2_analytical) < tol "S2 error too large" + @assert abs(indices.FirstOrder[3] - S3_analytical) < tol "S3 error too large" + @assert abs(indices.FirstOrder[4] - S4_analytical) < tol "S4 error too large" + @assert abs(indices.TotalEffect[1] - ST1_analytical) < tol "ST1 error too large" + @assert abs(indices.TotalEffect[2] - ST2_analytical) < tol "ST2 error too large" + @assert abs(indices.TotalEffect[3] - ST3_analytical) < tol "ST3 error too large" + @assert abs(indices.TotalEffect[4] - ST4_analytical) < tol "ST4 error too large" + + println("Portfolio model Kucherenko indices validation passed - all values within tolerance.") +catch e + println("Error computing Portfolio model Kucherenko indices: $e") + rethrow(e) +end diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index 76a498e84..5823f3190 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -167,7 +167,10 @@ export quadrature_nodes export quadrature_weights export rand export sample +export sample_conditional_copula export sobolindices +export kucherenkoindices +export kucherenkoindices_bin export to_copula_space export to_physical_space! export to_standard_normal_space @@ -220,6 +223,7 @@ include("simulations/importancesampling.jl") include("reliability/probabilityoffailure.jl") include("reliability/probabilityoffailure_imprecise.jl") include("sensitivity/sobolindices.jl") +include("sensitivity/kucherenkoindices.jl") include("util/fourier-transform.jl") include("util/wrap.jl") diff --git a/src/inputs/jointdistribution.jl b/src/inputs/jointdistribution.jl index 274b09762..9719bf7ac 100644 --- a/src/inputs/jointdistribution.jl +++ b/src/inputs/jointdistribution.jl @@ -43,7 +43,7 @@ struct JointDistribution{ function JointDistribution(d::Copula, m::Vector{<:RandomVariable}) dimensions(d) == length(m) || throw(ArgumentError("Dimension mismatch between copula and marginals.")) - return new{Copula,RandomVariable}(d, m) + return new{typeof(d),RandomVariable}(d, m) end # MultivariateDistribution + Symbol @@ -70,6 +70,55 @@ function sample(jd::JointDistribution{<:MultivariateDistribution,<:Symbol}, n::I return DataFrame(permutedims(rand(jd.d, n)), jd.m) end +function sample_conditional_copula(joint::JointDistribution{<:GaussianCopula,<:RandomVariable}, var_values::Vector{Tuple{Symbol,Float64}}, N::Int) + marginals, copula, R = joint.m, joint.d, joint.d.correlation + all_var_names, d = [marginal.name for marginal in marginals], length(marginals) + + v_indices = Int[] + for (var_name, _) in var_values + idx = findfirst(==(var_name), all_var_names) + idx === nothing && error("Variable $var_name not found in joint distribution. Available variables: $all_var_names") + push!(v_indices, idx) + end + + length(v_indices) >= d && error("All $(d) variables are fixed - need at least one free variable to sample") + + w_indices = setdiff(1:d, v_indices) + + z_v = zeros(length(var_values)) + for (i, (idx, (_, x_val))) in enumerate(zip(v_indices, var_values)) + z_v[i] = quantile(Normal(0,1), cdf(marginals[idx].dist, x_val)) + end + + Σ_vv, Σ_wv, Σ_vw, Σ_ww = R[v_indices, v_indices], R[w_indices, v_indices], R[v_indices, w_indices], R[w_indices, w_indices] + + if length(v_indices) == 1 + μ_cond = (Σ_wv / Σ_vv[1,1]) * z_v[1] + Σ_cond = Σ_ww .- (Σ_wv * Σ_vw) / Σ_vv[1,1] + else + Σ_vv_inv = inv(Σ_vv) + μ_cond = Σ_wv * Σ_vv_inv * z_v + Σ_cond = Σ_ww .- Σ_wv * Σ_vv_inv * Σ_vw + end + + μ_cond = vec(μ_cond) + Z_w = rand(MvNormal(μ_cond, Symmetric(Σ_cond)), N)' + + samples = DataFrame() + for (var_name, x_val) in var_values + samples[!, var_name] = fill(x_val, N) + end + for (i, idx) in enumerate(w_indices) + samples[!, all_var_names[idx]] = quantile.(marginals[idx].dist, cdf.(Normal(), Z_w[:,i])) + end + + return samples +end + +function sample_conditional_copula(joint::JointDistribution, var_name::Symbol, x_v::Float64, N::Int) + return sample_conditional_copula(joint, [(var_name, x_v)], N) +end + function to_physical_space!(jd::JointDistribution{<:Copula,<:RandomVariable}, x::DataFrame) correlated_cdf = to_copula_space(jd.d, Matrix{Float64}(x[:, names(jd)])) for (i, rv) in enumerate(jd.m) diff --git a/src/sensitivity/kucherenkoindices.jl b/src/sensitivity/kucherenkoindices.jl new file mode 100644 index 000000000..8e90f9d43 --- /dev/null +++ b/src/sensitivity/kucherenkoindices.jl @@ -0,0 +1,394 @@ +const _kucherenko_table_types = [Symbol[], Float64[], Float64[]] +const _kucherenko_table_header = [ + "Variables", "FirstOrder", "TotalEffect" +] + +""" +- `models::Vector{<:UQModel}`: Vector of UQ models to evaluate +- `inputs::Vector{<:UQInput}`: Vector of input distributions +- `output::Symbol`: Output quantity name +- `sim::AbstractMonteCarlo`: == N : Total MC samples = N*(2M+1) +""" +function kucherenkoindices( + models::Vector{<:UQModel}, + inputs::Vector{<:UQInput}, + output::Symbol, + sim::AbstractMonteCarlo +) + indices = DataFrame(_kucherenko_table_types, _kucherenko_table_header) + + samples = sample(inputs, sim) + evaluate!(models, samples) + + random_names = names(filter(i -> isa(i, RandomUQInput), inputs)) + + Y_orig = Vector(samples[:, output]) + total_var = var(Y_orig) + + for i in random_names + + i_cond_samples = _generate_conditional_samples(samples, inputs[1], [i]) + evaluate!(models, i_cond_samples) + S_i = _compute_first_order_kucherenko(samples, i_cond_samples, output, total_var) + + other_vars = setdiff(random_names, [i]) + other_cond_samples = _generate_conditional_samples(samples, inputs[1], other_vars) + evaluate!(models, other_cond_samples) + ST_i = length(random_names) == 1 ? S_i : _compute_total_effect_kucherenko(samples, other_cond_samples, output, total_var) + + push!(indices, [i, S_i, ST_i]) + end + + return indices +end + + +function _compute_first_order_kucherenko( + samples::DataFrame, + cond_samples::DataFrame, + output::Symbol, + total_var::Float64 +) + Y_orig = Vector(samples[:, output]) + Y_cond = Vector(cond_samples[:, output]) + + S_i = (mean(Y_orig .* Y_cond) - mean(Y_orig)^2) / total_var # Kucherenko et. al. 2012 Eq. 5.3 + + return S_i +end + +function _compute_total_effect_kucherenko( + samples::DataFrame, + cond_samples::DataFrame, + output::Symbol, + total_var::Float64 +) + Y_orig = Vector(samples[:, output]) + Y_cond = Vector(cond_samples[:, output]) + + ST_i = mean((Y_orig .- Y_cond).^2) / (2 * total_var) # Kucherenko et. al. 2012 Eq. 5.4 + + return ST_i +end + +function _generate_conditional_samples( + samples::DataFrame, + joint_dist::JointDistribution, + var_names::Vector{Symbol} +) + input_var_names = [marginal.name for marginal in joint_dist.m] + conditional_samples = map(1:nrow(samples)) do i + x_values = [samples[i, var_name] for var_name in var_names] + var_value_tuples = [(var_names[j], x_values[j]) for j in 1:length(var_names)] + sample_conditional_copula(joint_dist, var_value_tuples, 1) + end + + result = vcat(conditional_samples...) + return select(result, input_var_names) +end + +""" +- `models::Vector{<:UQModel}`: Vector of UQ models to evaluate +- `inputs::Vector{<:UQInput}`: Vector of input distributions +- `outputs::Vector{Symbol}`: Vector of output quantity names +- `sim::AbstractMonteCarlo`: == N : Total MC samples = N*(2M+1) +""" +function kucherenkoindices( + models::Vector{<:UQModel}, + inputs::Vector{<:UQInput}, + outputs::Vector{Symbol}, + sim::AbstractMonteCarlo +) + + random_names = names(filter(i -> isa(i, RandomUQInput), inputs)) + + indices = Dict([ + (name, DataFrame(_kucherenko_table_types, _kucherenko_table_header)) for name in outputs + ]) + + for output in outputs + + output_indices = kucherenkoindices(models, inputs, output, sim) + + for (i, var_name) in enumerate(random_names) + output_indices[i, :Variables] = var_name + end + + indices[output] = output_indices + end + + return length(outputs) > 1 ? indices : indices[outputs[1]] +end + +function kucherenkoindices( + models::Vector{<:UQModel}, + inputs::UQInput, + outputs::Vector{Symbol}, + sim::AbstractMonteCarlo +) + return kucherenkoindices(models, [inputs], outputs, sim) +end + + +function kucherenkoindices( + models::UQModel, + inputs::Vector{<:UQInput}, + outputs::Symbol, + sim::AbstractMonteCarlo +) + return kucherenkoindices([models], inputs, [outputs], sim) +end + +function kucherenkoindices( + models::Vector{<:UQModel}, + inputs::UQInput, + outputs::Symbol, + sim::AbstractMonteCarlo +) + return kucherenkoindices(models, [inputs], [outputs], sim) +end + +function kucherenkoindices( + models::UQModel, + inputs::UQInput, + outputs::Symbol, + sim::AbstractMonteCarlo +) + return kucherenkoindices([models], [inputs], [outputs], sim) +end + +function kucherenkoindices( + models::UQModel, + inputs::Vector{<:UQInput}, + outputs::Vector{Symbol}, + sim::AbstractMonteCarlo +) + return kucherenkoindices([models], inputs, outputs, sim) +end + + + +""" +- `X::Matrix`: Input sample matrix where each row is a sample point and each column is a variable +- `Y::Vector`: Output sample vector corresponding to the input samples +- `min_bin_sample::Int=25`: Minimum samples per bin for 1 dimension for conditioning; Recommended amount is at least 25 samples per bin with bin amount around 100 +- `min_bin_sample_multi_dims::Int=25`: Minimum samples per bin-dimension in multiple dimensions for conditioning; Recommended amount is about 10-25 samples per bin +""" +function kucherenkoindices_bin(X::Matrix, Y::Vector; min_bin_sample=nothing, min_bin_sample_multi_dims::Int=25) + n_samples, n_vars = size(X) + + num_bins = min_bin_sample === nothing ? min(100, floor(Int, n_samples / 25)) : floor(Int, n_samples / min_bin_sample) + num_bins_multi = floor(Int, n_samples / min_bin_sample_multi_dims) + + if num_bins > 500 @warn "More than 500 bins in single dimension: $num_bins" end + if length(Y) != n_samples throw(ArgumentError("Number of output samples must match number of input samples")) end + + indices = DataFrame(_kucherenko_table_types, _kucherenko_table_header) + total_var = var(Y) + + for i in 1:n_vars + S_i = _compute_first_order_kucherenko_bins(X, Y, i, num_bins, total_var) + + ST_i = n_vars == 1 ? S_i : _compute_total_effect_kucherenko_bins(X, Y, i, num_bins_multi, total_var) + + push!(indices, [Symbol("X$i"), S_i, ST_i]) + end + + return indices +end + +function _compute_first_order_kucherenko_bins(X::Matrix, Y::Vector, var_idx::Int, num_bins::Int, total_var::Float64) + n_samples = length(Y) + x_var = X[:, var_idx] + + quantiles = range(0, 1; length=num_bins+1) + bin_edges = quantile(x_var, quantiles) + + bin_means = Float64[] + bin_weights = Float64[] + + for j in 1:num_bins + if j == num_bins + mask = (x_var .>= bin_edges[j]) .& (x_var .<= bin_edges[j+1]) + else + mask = (x_var .>= bin_edges[j]) .& (x_var .< bin_edges[j+1]) + end + + n_bin = sum(mask) + + if n_bin > 0 + bin_mean = mean(Y[mask]) + push!(bin_means, bin_mean) + push!(bin_weights, n_bin / n_samples) + end + end + + if isempty(bin_means) + return 0.0 + end + + weighted_mean = sum(bin_weights .* bin_means) + + S_i = sum(bin_weights .* (bin_means .- weighted_mean).^2) / total_var + + return S_i +end + +function _compute_total_effect_kucherenko_bins(X::Matrix, Y::Vector, var_idx::Int, num_bins::Int, total_var::Float64) + n_samples, n_vars = size(X) + + other_vars = setdiff(1:n_vars, var_idx) + X_other = X[:, other_vars] + + bin_assignments = _assign_multidimensional_bins(X_other, num_bins) + + weighted_var_sum = 0.0 + total_weight = 0.0 + + for bin_id in unique(bin_assignments) + mask = bin_assignments .== bin_id + n_bin = sum(mask) + + if n_bin > 1 + bin_var = var(Y[mask]) + weight = n_bin / n_samples + weighted_var_sum += weight * bin_var + total_weight += weight + end + end + + ST_i = total_weight > 0 ? weighted_var_sum / total_var : 0.0 + + return ST_i +end + +function _assign_multidimensional_bins(X::Matrix, num_bins::Int) + n_samples, n_dims = size(X) + bins_per_dim = floor(Int, num_bins^(1/n_dims)) + + quantile_edges = [quantile(X[:, j], range(0, 1; length=bins_per_dim+1)) for j in 1:n_dims] + + bin_assignments = zeros(Int, n_samples) + + for i in 1:n_samples + bin_id = 0 + for j in 1:n_dims + edges = quantile_edges[j] + x = X[i, j] + bin_idx = searchsortedlast(edges, x) + bin_idx = clamp(bin_idx, 1, bins_per_dim) + bin_id += (bin_idx - 1) * (bins_per_dim ^ (j - 1)) + end + bin_assignments[i] = bin_id + 1 + end + + return bin_assignments +end + +""" +- `models::Vector{<:UQModel}`: Vector of UQ models to evaluate +- `inputs::Vector{<:UQInput}`: Vector of input distributions +- `outputs::Vector{Symbol}`: Vector of output quantity names +- `sim::AbstractMonteCarlo`: Total MC samples +- `min_bin_sample::Int=25`: Minimum samples per bin for 1 dimension for conditioning; Recommended amount is at least 25 samples per bin but bin amount not higher than 100 +- `min_bin_sample_multi_dims::Int=25`: Minimum samples per bin-dimension in multiple dimensions for conditioning; Recommended amount is about 10-25 samples per bin +""" +function kucherenkoindices_bin( + models::Vector{<:UQModel}, + inputs::Vector{<:UQInput}, + outputs::Vector{Symbol}, + sim::AbstractMonteCarlo; + min_bin_sample=nothing, + min_bin_sample_multi_dims::Int=25 +) + samples = sample(inputs, sim) + evaluate!(models, samples) + + random_names = names(filter(i -> isa(i, RandomUQInput), inputs)) + + indices = Dict([ + (name, DataFrame(_kucherenko_table_types, _kucherenko_table_header)) for name in outputs + ]) + + X = Matrix(samples[:, random_names]) + + for output in outputs + Y = Vector(samples[:, output]) + + output_indices = kucherenkoindices_bin(X, Y; min_bin_sample = min_bin_sample, min_bin_sample_multi_dims = min_bin_sample_multi_dims) + + for (i, var_name) in enumerate(random_names) + output_indices[i, :Variables] = var_name + end + + indices[output] = output_indices + end + + return length(outputs) > 1 ? indices : indices[outputs[1]], samples +end + +function kucherenkoindices_bin( + models::Vector{<:UQModel}, + inputs::UQInput, + outputs::Vector{Symbol}, + sim::AbstractMonteCarlo; + min_bin_sample=nothing, + min_bin_sample_multi_dims::Int=25 +) + return kucherenkoindices_bin(models, [inputs], outputs, sim; min_bin_sample=min_bin_sample, min_bin_sample_multi_dims=min_bin_sample_multi_dims) +end + +function kucherenkoindices_bin( + models::Vector{<:UQModel}, + inputs::Vector{<:UQInput}, + outputs::Symbol, + sim::AbstractMonteCarlo; + min_bin_sample=nothing, + min_bin_sample_multi_dims::Int=25 +) + return kucherenkoindices_bin(models, inputs, [outputs], sim; min_bin_sample=min_bin_sample, min_bin_sample_multi_dims=min_bin_sample_multi_dims) +end + +function kucherenkoindices_bin( + models::UQModel, + inputs::Vector{<:UQInput}, + outputs::Symbol, + sim::AbstractMonteCarlo; + min_bin_sample=nothing, + min_bin_sample_multi_dims::Int=25 +) + return kucherenkoindices_bin([models], inputs, [outputs], sim; min_bin_sample=min_bin_sample, min_bin_sample_multi_dims=min_bin_sample_multi_dims) +end + +function kucherenkoindices_bin( + models::Vector{<:UQModel}, + inputs::UQInput, + outputs::Symbol, + sim::AbstractMonteCarlo; + min_bin_sample=nothing, + min_bin_sample_multi_dims::Int=25 +) + return kucherenkoindices_bin(models, [inputs], [outputs], sim; min_bin_sample=min_bin_sample, min_bin_sample_multi_dims=min_bin_sample_multi_dims) +end + +function kucherenkoindices_bin( + models::UQModel, + inputs::UQInput, + outputs::Symbol, + sim::AbstractMonteCarlo; + min_bin_sample=nothing, + min_bin_sample_multi_dims::Int=25 +) + return kucherenkoindices_bin([models], [inputs], [outputs], sim; min_bin_sample=min_bin_sample, min_bin_sample_multi_dims=min_bin_sample_multi_dims) +end + +function kucherenkoindices_bin( + models::UQModel, + inputs::Vector{<:UQInput}, + outputs::Vector{Symbol}, + sim::AbstractMonteCarlo; + min_bin_sample=nothing, + min_bin_sample_multi_dims::Int=25 +) + return kucherenkoindices_bin([models], inputs, outputs, sim; min_bin_sample=min_bin_sample, min_bin_sample_multi_dims=min_bin_sample_multi_dims) +end diff --git a/test/Project.toml b/test/Project.toml index f36f601fe..0e4fef12b 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -4,6 +4,7 @@ Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" HCubature = "19dc6840-f33b-545b-b366-655c7e3ffd49" HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" diff --git a/test/inputs/copulas/gaussian.jl b/test/inputs/copulas/gaussian.jl index 0dd0b358a..803e80695 100644 --- a/test/inputs/copulas/gaussian.jl +++ b/test/inputs/copulas/gaussian.jl @@ -13,6 +13,26 @@ correlation = [1 0.7071; 0.7071 1] @test corkendall(u[:, 1], u[:, 2]) ≈ 0.5 atol = 0.01 end + @testset "sample_conditional_copula" begin + marginals = [RandomVariable(Normal(0,1), :x1), RandomVariable(Normal(0,1), :x2), RandomVariable(Normal(0,1), :x3)] + joint_dist = JointDistribution(GaussianCopula([1 0.7071 0.5; 0.7071 1 0.3; 0.5 0.3 1]), marginals) + unconditional = sample(joint_dist, 500000) + + x_val = 0.5 + cond1 = sample_conditional_copula(joint_dist, [(:x1, x_val)], 10000) + filtered1 = unconditional[abs.(unconditional[:, :x1] .- x_val) .< 0.1, :] + @test mean(filtered1[:, :x2]) ≈ mean(cond1[:, :x2]) rtol=0.1 + @test std(filtered1[:, :x2]) ≈ std(cond1[:, :x2]) rtol=0.1 + @test mean(filtered1[:, :x3]) ≈ mean(cond1[:, :x3]) rtol=0.1 + @test std(filtered1[:, :x3]) ≈ std(cond1[:, :x3]) rtol=0.1 + + x_val2, x_val3 = -0.5, 0.2 + cond2 = sample_conditional_copula(joint_dist, [(:x2, x_val2), (:x3, x_val3)], 10000) + filtered2 = unconditional[(abs.(unconditional[:, :x2] .- x_val2) .< 0.15) .& (abs.(unconditional[:, :x3] .- x_val3) .< 0.15), :] + @test mean(filtered2[:, :x1]) ≈ mean(cond2[:, :x1]) rtol=0.15 + @test std(filtered2[:, :x1]) ≈ std(cond2[:, :x1]) rtol=0.15 + end + @testset "to_standard_normal_space" begin u = sample(copula, 10^5) s = to_standard_normal_space(copula, u) diff --git a/test/runtests.jl b/test/runtests.jl index 00a02afe8..1a7bc002f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,6 +6,7 @@ using InteractiveUtils using QuasiMonteCarlo using Random using StatsBase: fit, Histogram, corkendall +using LinearAlgebra: diag using Test using UncertaintyQuantification @@ -41,6 +42,7 @@ include("reliability/probabilityoffailure_imprecise.jl") include("sensitivity/gradient.jl") include("sensitivity/sobolindices.jl") +include("sensitivity/kucherenkoindices.jl") include("simulations/doe.jl") include("simulations/montecarlo.jl") diff --git a/test/sensitivity/kucherenkoindices.jl b/test/sensitivity/kucherenkoindices.jl new file mode 100644 index 000000000..a7a0c691c --- /dev/null +++ b/test/sensitivity/kucherenkoindices.jl @@ -0,0 +1,144 @@ +@testset "kucherenkoindices" begin + # Testing Kucherenko indices - Test Case 1: Linear model with correlated variables from Kucherenko et al. (2012) (DOI: 10.1016/j.cpc.2011.12.020) + ρ, σ = 0.5, 2.0 + Σ = [1.0 0.0 0.0; 0.0 1.0 ρ*σ; 0.0 ρ*σ σ^2] + R = Σ ./ (sqrt.(diag(Σ)) * sqrt.(diag(Σ))') + + marginals = RandomVariable[ + RandomVariable(Normal(0, 1), :x1), + RandomVariable(Normal(0, 1), :x2), + RandomVariable(Normal(0, σ), :x3) + ] + + inputs = [ JointDistribution(GaussianCopula(R), marginals) ] + model = Model(df -> df.x1 .+ df.x2 .+ df.x3, :y) + sim = MonteCarlo(50000) + + # Analytical calculations + denom = 2 + σ^2 + 2*ρ*σ + firstorder_analytical = [1 / denom, (1 + ρ*σ)^2 / denom, (σ + ρ)^2 / denom] + totaleffect_analytical = [1 / denom, (1 - ρ^2) / denom, (σ^2 * (1 - ρ^2)) / denom] + + model_samples = sample(inputs, sim) + evaluate!(model, model_samples) + + + @testset "Standard Kucherenko Indices" begin + indices = kucherenkoindices([model], inputs, [:y], sim) + + @test indices.FirstOrder ≈ firstorder_analytical rtol = 0.1 + @test indices.TotalEffect ≈ totaleffect_analytical rtol = 0.1 + end + + + @testset "Standard Kucherenko Indices - First Order" begin + random_names = names(filter(i -> isa(i, RandomUQInput), inputs)) + Y_orig = Vector(model_samples[:, :y]) + total_var = var(Y_orig) + + S_i = zeros(length(random_names)) + + for (j, i) in enumerate(random_names) + i_cond_samples = UncertaintyQuantification._generate_conditional_samples(model_samples, inputs[1], [i]) + evaluate!(model, i_cond_samples) + S_i[j] = UncertaintyQuantification._compute_first_order_kucherenko(model_samples, i_cond_samples, :y, total_var) + end + @test S_i ≈ firstorder_analytical rtol = 0.1 + end + + @testset "Stnadard Kucherenko Indices - Total Effect" begin + random_names = names(filter(i -> isa(i, RandomUQInput), inputs)) + Y_orig = Vector(model_samples[:, :y]) + total_var = var(Y_orig) + + ST_i = zeros(length(random_names)) + + for (j, i) in enumerate(random_names) + not_i_cond_samples = UncertaintyQuantification._generate_conditional_samples(model_samples, inputs[1], setdiff(random_names, [i])) + evaluate!(model, not_i_cond_samples) + ST_i[j] = UncertaintyQuantification._compute_total_effect_kucherenko(model_samples, not_i_cond_samples, :y, total_var) + end + @test ST_i ≈ totaleffect_analytical rtol = 0.1 + end + + @testset "_generate_conditional_samples" begin + marginals = [RandomVariable(Normal(0,1), :x1), RandomVariable(Normal(0,1), :x2)] + R = [1.0 0.5; 0.5 1.0] + joint_dist = JointDistribution(GaussianCopula(R), marginals) + samples_df = DataFrame(sample([joint_dist], MonteCarlo(100))) + cond_samples = UncertaintyQuantification._generate_conditional_samples(samples_df, joint_dist, [:x1]) + @test names(cond_samples) == ["x1", "x2"] + @test nrow(cond_samples) == 100 + @test cond_samples[!, :x1] ≈ samples_df[!, :x1] + end + + @testset "Kucherenko Indices with bins" begin + indices, bin_samples = kucherenkoindices_bin([model], inputs, [:y], sim; min_bin_sample_multi_dims=10) + + @test indices.FirstOrder ≈ firstorder_analytical rtol = 0.1 + @test indices.TotalEffect ≈ totaleffect_analytical rtol = 0.1 + end + + @testset "Kucherenko Indices with bins - Existing Samples" begin + random_names = names(filter(i -> isa(i, RandomUQInput), inputs)) + X = Matrix(model_samples[:, random_names]) + Y = Vector(model_samples[:, :y]) + + indices = kucherenkoindices_bin(X, Y; min_bin_sample_multi_dims=10) + + @test indices.FirstOrder ≈ firstorder_analytical rtol = 0.1 + @test indices.TotalEffect ≈ totaleffect_analytical rtol = 0.1 + end + + @testset "Kucherenko Indices with bins - First Order" begin + random_names = names(filter(i -> isa(i, RandomUQInput), inputs)) + X = Matrix(model_samples[:, random_names]) + Y = Vector(model_samples[:, :y]) + n_samples, n_vars = size(X) + total_var = var(Y) + + S_i = zeros(n_vars) + for i in 1:n_vars + S_i[i] = UncertaintyQuantification._compute_first_order_kucherenko_bins(X, Y, i, min(100, floor(Int, n_samples / 25)), total_var) + end + @test S_i ≈ firstorder_analytical rtol = 0.1 + end + + @testset "Kucherenko Indices with bins - Total Order" begin + random_names = names(filter(i -> isa(i, RandomUQInput), inputs)) + X = Matrix(model_samples[:, random_names]) + Y = Vector(model_samples[:, :y]) + n_samples, n_vars = size(X) + total_var = var(Y) + + ST_i = zeros(n_vars) + for i in 1:n_vars + ST_i[i] = UncertaintyQuantification._compute_total_effect_kucherenko_bins(X, Y, i, floor(Int, n_samples / 25), total_var) + end + @test ST_i ≈ totaleffect_analytical rtol = 0.1 + end + + @testset "_assign_multidimensional_bins" begin + X = [1 10 100; + 2 20 110; + 3 30 120; + 4 40 130; + 5 50 140; + 6 60 150; + 7 70 160; + 8 80 170; + 9 90 180; + 10 100 190; + 11 110 200; + 12 120 210] + num_bins = 8 + bin_assignments = UncertaintyQuantification._assign_multidimensional_bins(X, num_bins) + @test length(bin_assignments) == size(X, 1) + @test all(bin_assignments .>= 1) + @test all(bin_assignments .<= num_bins) + @test bin_assignments[1] == bin_assignments[2] + @test bin_assignments[end-1] == bin_assignments[end] + @test bin_assignments[1] != bin_assignments[end] + end + +end \ No newline at end of file