Use init!! for initialisation

penelopeysm · penelopeysm · commit b2b58735bdcd · 2025-07-10T18:33:23.000+01:00
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -456,6 +456,11 @@ AbstractPPL.evaluate!!
 
 This method mutates the `varinfo` used for execution.
 By default, it does not perform any actual sampling: it only evaluates the model using the values of the variables that are already in the `varinfo`.
+To perform sampling, you can either wrap `model.context` in a `SamplingContext`, or use this convenience method:
+
+```@docs
+DynamicPPL.evaluate_and_sample!!
+```
 
 The behaviour of a model execution can be changed with evaluation contexts, which are a field of the model.
 Contexts are subtypes of `AbstractPPL.AbstractContext`.
@@ -514,7 +519,7 @@ The default implementation of [`Sampler`](@ref) uses the following unexported fu
 ```@docs
 DynamicPPL.initialstep
 DynamicPPL.loadstate
-DynamicPPL.initialsampler
+DynamicPPL.init_strategy
 ```
 
 Finally, to specify which varinfo type a [`Sampler`](@ref) should use for a given [`Model`](@ref), this is specified by [`DynamicPPL.default_varinfo`](@ref) and can thus be overloaded for each  `model`-`sampler` combination. This can be useful in cases where one has explicit knowledge that one type of varinfo will be more performant for the given `model` and `sampler`.
diff --git a/src/sampler.jl b/src/sampler.jl
@@ -68,6 +68,8 @@ end
 
 Return a default varinfo object for the given `model` and `sampler`.
 
+The default method for this returns an empty NTVarInfo (i.e. 'typed varinfo').
+
 # Arguments
 - `rng::Random.AbstractRNG`: Random number generator.
 - `model::Model`: Model for which we want to create a varinfo object.
@@ -76,9 +78,10 @@ Return a default varinfo object for the given `model` and `sampler`.
 # Returns
 - `AbstractVarInfo`: Default varinfo object for the given `model` and `sampler`.
 """
-function default_varinfo(rng::Random.AbstractRNG, model::Model, sampler::AbstractSampler)
-    init_sampler = initialsampler(sampler)
-    return typed_varinfo(rng, model, init_sampler)
+function default_varinfo(::Random.AbstractRNG, ::Model, ::AbstractSampler)
+    # Note that variable values are unconditionally initialized later, so no
+    # point putting them in now.
+    return typed_varinfo(VarInfo())
 end
 
 function AbstractMCMC.sample(
@@ -96,24 +99,32 @@ function AbstractMCMC.sample(
     )
 end
 
-# initial step: general interface for resuming and
+"""
+    init_strategy(sampler)
+
+Define the initialisation strategy used for generating initial values when
+sampling with `sampler`. Defaults to `PriorInit()`, but can be overridden.
+"""
+init_strategy(::Sampler) = PriorInit()
+
 function AbstractMCMC.step(
-    rng::Random.AbstractRNG, model::Model, spl::Sampler; initial_params=nothing, kwargs...
+    rng::Random.AbstractRNG,
+    model::Model,
+    spl::Sampler;
+    initial_params::AbstractInitStrategy=init_strategy(spl),
+    kwargs...,
 )
-    # Sample initial values.
+    # Generate the default varinfo (usually this just makes an empty VarInfo
+    # with NamedTuple of Metadata).
     vi = default_varinfo(rng, model, spl)
 
-    # Update the parameters if provided.
-    if initial_params !== nothing
-        vi = initialize_parameters!!(vi, initial_params, model)
-
-        # Update joint log probability.
-        # This is a quick fix for https://github.com/TuringLang/Turing.jl/issues/1588
-        # and https://github.com/TuringLang/Turing.jl/issues/1563
-        # to avoid that existing variables are resampled
-        vi = last(evaluate!!(model, vi))
-    end
+    # Fill it with initial parameters. Note that, if `ParamsInit` is used, the
+    # parameters provided must be in unlinked space (when inserted into the
+    # varinfo, they will be adjusted to match the linking status of the
+    # varinfo).
+    _, vi = init!!(rng, model, vi, initial_params)
 
+    # Call the actual function that does the first step.
     return initialstep(rng, model, spl, vi; initial_params, kwargs...)
 end
 
@@ -131,110 +142,7 @@ loadstate(data) = data
 
 Default type of the chain of posterior samples from `sampler`.
 """
-default_chain_type(sampler::Sampler) = Any
-
-"""
-    initialsampler(sampler::Sampler)
-
-Return the sampler that is used for generating the initial parameters when sampling with
-`sampler`.
-
-By default, it returns an instance of [`SampleFromPrior`](@ref).
-"""
-initialsampler(spl::Sampler) = SampleFromPrior()
-
-"""
-    set_initial_values(varinfo::AbstractVarInfo, initial_params::AbstractVector)
-    set_initial_values(varinfo::AbstractVarInfo, initial_params::NamedTuple)
-
-Take the values inside `initial_params`, replace the corresponding values in
-the given VarInfo object, and return a new VarInfo object with the updated values.
-
-This differs from `DynamicPPL.unflatten` in two ways:
-
-1. It works with `NamedTuple` arguments.
-2. For the `AbstractVector` method, if any of the elements are missing, it will not
-overwrite the original value in the VarInfo (it will just use the original
-value instead).
-"""
-function set_initial_values(varinfo::AbstractVarInfo, initial_params::AbstractVector)
-    throw(
-        ArgumentError(
-            "`initial_params` must be a vector of type `Union{Real,Missing}`. " *
-            "If `initial_params` is a vector of vectors, please flatten it (e.g. using `vcat`) first.",
-        ),
-    )
-end
-
-function set_initial_values(
-    varinfo::AbstractVarInfo, initial_params::AbstractVector{<:Union{Real,Missing}}
-)
-    flattened_param_vals = varinfo[:]
-    length(flattened_param_vals) == length(initial_params) || throw(
-        DimensionMismatch(
-            "Provided initial value size ($(length(initial_params))) doesn't match " *
-            "the model size ($(length(flattened_param_vals))).",
-        ),
-    )
-
-    # Update values that are provided.
-    for i in eachindex(initial_params)
-        x = initial_params[i]
-        if x !== missing
-            flattened_param_vals[i] = x
-        end
-    end
-
-    # Update in `varinfo`.
-    new_varinfo = unflatten(varinfo, flattened_param_vals)
-    return new_varinfo
-end
-
-function set_initial_values(varinfo::AbstractVarInfo, initial_params::NamedTuple)
-    varinfo = deepcopy(varinfo)
-    vars_in_varinfo = keys(varinfo)
-    for v in keys(initial_params)
-        vn = VarName{v}()
-        if !(vn in vars_in_varinfo)
-            for vv in vars_in_varinfo
-                if subsumes(vn, vv)
-                    throw(
-                        ArgumentError(
-                            "The current model contains sub-variables of $v, such as ($vv). " *
-                            "Using NamedTuple for initial_params is not supported in such a case. " *
-                            "Please use AbstractVector for initial_params instead of NamedTuple.",
-                        ),
-                    )
-                end
-            end
-            throw(ArgumentError("Variable $v not found in the model."))
-        end
-    end
-    initial_params = NamedTuple(k => v for (k, v) in pairs(initial_params) if v !== missing)
-    return update_values!!(
-        varinfo, initial_params, map(k -> VarName{k}(), keys(initial_params))
-    )
-end
-
-function initialize_parameters!!(vi::AbstractVarInfo, initial_params, model::Model)
-    @debug "Using passed-in initial variable values" initial_params
-
-    # `link` the varinfo if needed.
-    linked = islinked(vi)
-    if linked
-        vi = invlink!!(vi, model)
-    end
-
-    # Set the values in `vi`.
-    vi = set_initial_values(vi, initial_params)
-
-    # `invlink` if needed.
-    if linked
-        vi = link!!(vi, model)
-    end
-
-    return vi
-end
+default_chain_type(::Sampler) = Any
 
 """
     initialstep(rng, model, sampler, varinfo; kwargs...)
diff --git a/test/sampler.jl b/test/sampler.jl
@@ -82,7 +82,9 @@
             sampler = Sampler(alg)
             lptrue = logpdf(Binomial(25, 0.2), 10)
             let inits = (; p=0.2)
-                chain = sample(model, sampler, 1; initial_params=inits, progress=false)
+                chain = sample(
+                    model, sampler, 1; initial_params=ParamsInit(inits), progress=false
+                )
                 @test chain[1].metadata.p.vals == [0.2]
                 @test getlogjoint(chain[1]) == lptrue
 
@@ -110,7 +112,9 @@
             model = twovars()
             lptrue = logpdf(InverseGamma(2, 3), 4) + logpdf(Normal(0, 2), -1)
             for inits in ([4, -1], (; s=4, m=-1))
-                chain = sample(model, sampler, 1; initial_params=inits, progress=false)
+                chain = sample(
+                    model, sampler, 1; initial_params=ParamsInit(inits), progress=false
+                )
                 @test chain[1].metadata.s.vals == [4]
                 @test chain[1].metadata.m.vals == [-1]
                 @test getlogjoint(chain[1]) == lptrue
@@ -122,7 +126,7 @@
                     MCMCThreads(),
                     1,
                     10;
-                    initial_params=fill(inits, 10),
+                    initial_params=fill(ParamsInit(inits), 10),
                     progress=false,
                 )
                 for c in chains
@@ -133,8 +137,10 @@
             end
 
             # set only m = -1
-            for inits in ([missing, -1], (; s=missing, m=-1), (; m=-1))
-                chain = sample(model, sampler, 1; initial_params=inits, progress=false)
+            for inits in ((; s=missing, m=-1), (; m=-1))
+                chain = sample(
+                    model, sampler, 1; initial_params=ParamsInit(inits), progress=false
+                )
                 @test !ismissing(chain[1].metadata.s.vals[1])
                 @test chain[1].metadata.m.vals == [-1]
 
@@ -153,54 +159,6 @@
                     @test c[1].metadata.m.vals == [-1]
                 end
             end
-
-            # specify `initial_params=nothing`
-            Random.seed!(1234)
-            chain1 = sample(model, sampler, 1; progress=false)
-            Random.seed!(1234)
-            chain2 = sample(model, sampler, 1; initial_params=nothing, progress=false)
-            @test_throws DimensionMismatch sample(
-                model, sampler, 1; progress=false, initial_params=zeros(10)
-            )
-            @test chain1[1].metadata.m.vals == chain2[1].metadata.m.vals
-            @test chain1[1].metadata.s.vals == chain2[1].metadata.s.vals
-
-            # parallel sampling
-            Random.seed!(1234)
-            chains1 = sample(model, sampler, MCMCThreads(), 1, 10; progress=false)
-            Random.seed!(1234)
-            chains2 = sample(
-                model, sampler, MCMCThreads(), 1, 10; initial_params=nothing, progress=false
-            )
-            for (c1, c2) in zip(chains1, chains2)
-                @test c1[1].metadata.m.vals == c2[1].metadata.m.vals
-                @test c1[1].metadata.s.vals == c2[1].metadata.s.vals
-            end
-        end
-
-        @testset "error handling" begin
-            # https://github.com/TuringLang/Turing.jl/issues/2452
-            @model function constrained_uniform(n)
-                Z ~ Uniform(10, 20)
-                X = Vector{Float64}(undef, n)
-                for i in 1:n
-                    X[i] ~ Uniform(0, Z)
-                end
-            end
-
-            n = 2
-            initial_z = 15
-            initial_x = [0.2, 0.5]
-            model = constrained_uniform(n)
-            vi = VarInfo(model)
-
-            @test_throws ArgumentError DynamicPPL.initialize_parameters!!(
-                vi, [initial_z, initial_x], model
-            )
-
-            @test_throws ArgumentError DynamicPPL.initialize_parameters!!(
-                vi, (X=initial_x, Z=initial_z), model
-            )
         end
     end
 end