Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
TaijaBase = "10284c91-9f28-4c9a-abbf-ee43576dfff6"

[compat]
Aqua = "0.8"
Expand All @@ -39,6 +40,7 @@ ProgressMeter = "1"
Random = "1.7, 1.8, 1.9, 1.10"
StatsBase = "0.33, 0.34.0"
Tables = "1"
TaijaBase = "1"
Test = "1.7, 1.8, 1.9, 1.10"
julia = "1.7, 1.8, 1.9, 1.10"

Expand Down
2 changes: 2 additions & 0 deletions src/ConformalPrediction.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
module ConformalPrediction

using TaijaBase

# Conformal Models:
include("conformal_models/conformal_models.jl")
export ConformalModel
Expand Down
13 changes: 3 additions & 10 deletions src/conformal_models/conformal_models.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,26 +50,19 @@ function conformal_model(
return conf_model
end

# Inductive Models:
include("inductive/inductive_models.jl")

# Regression Models:
include("inductive_regression.jl")
include("transductive_regression.jl")

# Classification Models
include("inductive_classification.jl")
include("transductive_classification.jl")

# Training:
include("ConformalTraining/ConformalTraining.jl")
using .ConformalTraining

# Type unions:
const InductiveModel = Union{
SimpleInductiveRegressor,
SimpleInductiveClassifier,
AdaptiveInductiveClassifier,
ConformalQuantileRegressor,
}

const TransductiveModel = Union{
NaiveRegressor,
JackknifeRegressor,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,50 +1,36 @@
"""
score(conf_model::ConformalProbabilisticSet, fitresult, X, y=nothing)

Generic score method for the [`ConformalProbabilisticSet`](@ref). It computes nonconformity scores using the heuristic function `h` and the softmax probabilities of the true class. Method is dispatched for different Conformal Probabilistic Sets and atomic models.
"""
function score(conf_model::ConformalProbabilisticSet, fitresult, X, y=nothing)
return score(conf_model, conf_model.model, fitresult, X, y)
end

"""
split_data(conf_model::ConformalProbabilisticSet, indices::Base.OneTo{Int})

Splits the data into a proper training and calibration set.
"""
function split_data(conf_model::ConformalProbabilisticSet, X, y)
train, calibration = partition(eachindex(y), conf_model.train_ratio)
Xtrain = selectrows(X, train)
ytrain = y[train]
Xcal = selectrows(X, calibration)
ycal = y[calibration]

return Xtrain, ytrain, Xcal, ycal
end

# Simple
"The `SimpleInductiveClassifier` is the simplest approach to Inductive Conformal Classification. Contrary to the [`NaiveClassifier`](@ref) it computes nonconformity scores using a designated calibration dataset."
mutable struct SimpleInductiveClassifier{Model<:Supervised} <: ConformalProbabilisticSet
model::Model
coverage::AbstractFloat
scores::Union{Nothing,Dict{Any,Any}}
heuristic::Function
parallelizer::Union{Nothing,AbstractParallelizer}
train_ratio::AbstractFloat
end

function SimpleInductiveClassifier(
model::Supervised;
coverage::AbstractFloat=0.95,
heuristic::Function=minus_softmax,
parallelizer::Union{Nothing,AbstractParallelizer}=nothing,
train_ratio::AbstractFloat=0.5,
)
return SimpleInductiveClassifier(model, coverage, nothing, heuristic, train_ratio)
return SimpleInductiveClassifier(
model, coverage, nothing, heuristic, parallelizer, train_ratio
)
end

"""
@doc raw"""
score(conf_model::SimpleInductiveClassifier, ::Type{<:Supervised}, fitresult, X, y::Union{Nothing,AbstractArray}=nothing)

Score method for the [`SimpleInductiveClassifier`](@ref) dispatched for any `<:Supervised` model.
Score method for the [`SimpleInductiveClassifier`](@ref) dispatched for any `<:Supervised` model. For the [`SimpleInductiveClassifier`](@ref) nonconformity scores are computed as follows:

``
S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu(X_i), Y_i), \ i \in \mathcal{D}_{\text{calibration}}
``

A typical choice for the heuristic function is ``h(\hat\mu(X_i), Y_i)=1-\hat\mu(X_i)_{Y_i}`` where ``\hat\mu(X_i)_{Y_i}`` denotes the softmax output of the true class and ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}``. The simple approach only takes the softmax probability of the true label into account.
"""
function score(
conf_model::SimpleInductiveClassifier, atomic::Supervised, fitresult, X, y=nothing
Expand All @@ -61,34 +47,6 @@ function score(
end
end

@doc raw"""
MMI.fit(conf_model::SimpleInductiveClassifier, verbosity, X, y)

For the [`SimpleInductiveClassifier`](@ref) nonconformity scores are computed as follows:

``
S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu(X_i), Y_i), \ i \in \mathcal{D}_{\text{calibration}}
``

A typical choice for the heuristic function is ``h(\hat\mu(X_i), Y_i)=1-\hat\mu(X_i)_{Y_i}`` where ``\hat\mu(X_i)_{Y_i}`` denotes the softmax output of the true class and ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}``. The simple approach only takes the softmax probability of the true label into account.
"""
function MMI.fit(conf_model::SimpleInductiveClassifier, verbosity, X, y)

# Data Splitting:
Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y)

# Training:
fitresult, cache, report = MMI.fit(
conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)...
)

# Nonconformity Scores:
cal_scores, scores = score(conf_model, fitresult, Xcal, ycal)
conf_model.scores = Dict(:calibration => cal_scores, :all => scores)

return (fitresult, cache, report)
end

@doc raw"""
MMI.predict(conf_model::SimpleInductiveClassifier, fitresult, Xnew)

Expand Down Expand Up @@ -127,42 +85,20 @@ mutable struct AdaptiveInductiveClassifier{Model<:Supervised} <: ConformalProbab
coverage::AbstractFloat
scores::Union{Nothing,Dict{Any,Any}}
heuristic::Function
parallelizer::Union{Nothing,AbstractParallelizer}
train_ratio::AbstractFloat
end

function AdaptiveInductiveClassifier(
model::Supervised;
coverage::AbstractFloat=0.95,
heuristic::Function=minus_softmax,
parallelizer::Union{Nothing,AbstractParallelizer}=nothing,
train_ratio::AbstractFloat=0.5,
)
return AdaptiveInductiveClassifier(model, coverage, nothing, heuristic, train_ratio)
end

@doc raw"""
MMI.fit(conf_model::AdaptiveInductiveClassifier, verbosity, X, y)

For the [`AdaptiveInductiveClassifier`](@ref) nonconformity scores are computed by cumulatively summing the ranked scores of each label in descending order until reaching the true label ``Y_i``:

``
S_i^{\text{CAL}} = s(X_i,Y_i) = \sum_{j=1}^k \hat\mu(X_i)_{\pi_j} \ \text{where } \ Y_i=\pi_k, i \in \mathcal{D}_{\text{calibration}}
``
"""
function MMI.fit(conf_model::AdaptiveInductiveClassifier, verbosity, X, y)

# Data Splitting:
Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y)

# Training:
fitresult, cache, report = MMI.fit(
conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)...
return AdaptiveInductiveClassifier(
model, coverage, nothing, heuristic, parallelizer, train_ratio
)

# Nonconformity Scores:
cal_scores, scores = score(conf_model, fitresult, Xcal, ycal)
conf_model.scores = Dict(:calibration => cal_scores, :all => scores)

return (fitresult, cache, report)
end

"""
Expand Down
57 changes: 57 additions & 0 deletions src/conformal_models/inductive/inductive_models.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Type unions:
include("classification.jl")
include("regression.jl")

const InductiveModel = Union{
SimpleInductiveRegressor,
SimpleInductiveClassifier,
AdaptiveInductiveClassifier,
ConformalQuantileRegressor,
}

"""
split_data(conf_model::InductiveModel, indices::Base.OneTo{Int})

Splits the data into a proper training and calibration set for inductive models.
"""
function split_data(conf_model::InductiveModel, X, y)

train, calibration = partition(eachindex(y), conf_model.train_ratio)
Xtrain = selectrows(X, train)
ytrain = y[train]
Xcal = selectrows(X, calibration)
ycal = y[calibration]

return Xtrain, ytrain, Xcal, ycal
end

"""
score(conf_model::InductiveModel, fitresult, X, y=nothing)

Generic score method for the [`InductiveModel`](@ref). It computes nonconformity scores using the heuristic function `h` and the softmax probabilities of the true class. Method is dispatched for different Conformal Probabilistic Sets and atomic models.
"""
function score(conf_model::InductiveModel, fitresult, X, y=nothing)
return score(conf_model, conf_model.model, fitresult, X, y)
end

@doc raw"""
MMI.fit(conf_model::InductiveModel, verbosity, X, y)

Fits the [`InductiveModel`](@ref) model.
"""
function MMI.fit(conf_model::InductiveModel, verbosity, X, y)

# Data Splitting:
Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y)

# Training:
fitresult, cache, report = MMI.fit(
conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)...
)

# Nonconformity Scores:
cal_scores, scores = score(conf_model, fitresult, Xcal, ycal)
conf_model.scores = Dict(:calibration => cal_scores, :all => scores)

return (fitresult, cache, report)
end
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,24 @@ mutable struct SimpleInductiveRegressor{Model<:Supervised} <: ConformalInterval
coverage::AbstractFloat
scores::Union{Nothing,AbstractArray}
heuristic::Function
parallelizer::Union{Nothing,AbstractParallelizer}
train_ratio::AbstractFloat
end

function SimpleInductiveRegressor(
model::Supervised;
coverage::AbstractFloat=0.95,
heuristic::Function=absolute_error,
parallelizer::Union{Nothing,AbstractParallelizer}=nothing,
train_ratio::AbstractFloat=0.5,
)
return SimpleInductiveRegressor(model, coverage, nothing, heuristic, train_ratio)
return SimpleInductiveRegressor(
model, coverage, nothing, heuristic, parallelizer, train_ratio
)
end

@doc raw"""
MMI.fit(conf_model::SimpleInductiveRegressor, verbosity, X, y)
score(conf_model::SimpleInductiveRegressor, atomic::Supervised, fitresult, X, y=nothing)

For the [`SimpleInductiveRegressor`](@ref) nonconformity scores are computed as follows:

Expand All @@ -29,27 +33,12 @@ S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu(X_i), Y_i), \ i \in \mathcal{D}_{\tex

A typical choice for the heuristic function is ``h(\hat\mu(X_i),Y_i)=|Y_i-\hat\mu(X_i)|`` where ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}``.
"""
function MMI.fit(conf_model::SimpleInductiveRegressor, verbosity, X, y)

# Data Splitting:
train, calibration = partition(eachindex(y), conf_model.train_ratio)
Xtrain = selectrows(X, train)
ytrain = y[train]
Xcal = selectrows(X, calibration)
ycal = y[calibration]

# Training:
fitresult, cache, report = MMI.fit(
conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)...
)

# Nonconformity Scores:
ŷ = reformat_mlj_prediction(
MMI.predict(conf_model.model, fitresult, MMI.reformat(conf_model.model, Xcal)...)
)
conf_model.scores = @.(conf_model.heuristic(ycal, ŷ))

return (fitresult, cache, report)
function score(
conf_model::SimpleInductiveRegressor, atomic::Supervised, fitresult, X, y=nothing
)
ŷ = reformat_mlj_prediction(MMI.predict(atomic, fitresult, MMI.reformat(atomic, X)...))
scores = @.(conf_model.heuristic(y, ŷ))
return scores, scores
end

# Prediction
Expand Down Expand Up @@ -84,6 +73,7 @@ mutable struct ConformalQuantileRegressor{Model<:QuantileModel} <: ConformalInte
coverage::AbstractFloat
scores::Union{Nothing,AbstractArray}
heuristic::Function
parallelizer::Union{Nothing,AbstractParallelizer}
train_ratio::AbstractFloat
end

Expand All @@ -93,9 +83,12 @@ function ConformalQuantileRegressor(
heuristic::Function=function f(y, ŷ_lb, ŷ_ub)
return reduce((x, y) -> max.(x, y), [ŷ_lb - y, y - ŷ_ub])
end,
parallelizer::Union{Nothing,AbstractParallelizer}=nothing,
train_ratio::AbstractFloat=0.5,
)
return ConformalQuantileRegressor(model, coverage, nothing, heuristic, train_ratio)
return ConformalQuantileRegressor(
model, coverage, nothing, heuristic, parallelizer, train_ratio
)
end

@doc raw"""
Expand All @@ -114,13 +107,7 @@ A typical choice for the heuristic function is ``h(\hat\mu_{\alpha_{lo}}(X_i), \
function MMI.fit(conf_model::ConformalQuantileRegressor, verbosity, X, y)

# Data Splitting:
train, calibration = partition(eachindex(y), conf_model.train_ratio)
Xtrain = selectrows(X, train)
ytrain = y[train]
Xtrain, ytrain = MMI.reformat(conf_model.model, Xtrain, ytrain)
Xcal = selectrows(X, calibration)
ycal = y[calibration]
Xcal, ycal = MMI.reformat(conf_model.model, Xcal, ycal)
Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y)

# Training:
fitresult, cache, report, y_pred = ([], [], [], [])
Expand Down
Loading