TuringLang
diff --git a/‎src/DynamicPPL.jl
Lines changed: 3 additions & 2 deletions b/‎src/DynamicPPL.jl
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/contexts/init.jl
Lines changed: 182 additions & 132 deletions b/‎src/contexts/init.jl
Lines changed: 182 additions & 132 deletions
@@ -98,8 +98,9 @@ export AbstractVarInfo,
     # Samplers
     Sampler,
     # Initialisation strategies
-    Prior,
-    Uniform,
+    PriorInit,
+    UniformInit,
+    ParamsInit,
     # LogDensityFunction
     LogDensityFunction,
     # Contexts
 
@@ -1,45 +1,8 @@
-# Uniform random numbers with range 4 for robust initializations
+# UniformInit random numbers with range 4 for robust initializations
 # Reference: https://mc-stan.org/docs/2_19/reference-manual/initialization.html
 randrealuni(rng::Random.AbstractRNG) = 4 * rand(rng) - 2
 randrealuni(rng::Random.AbstractRNG, args...) = 4 .* rand(rng, args...) .- 2
 
-istransformable(dist) = link_transform(dist) !== identity
-
-#################################
-# Single-sample initialisations #
-#################################
-inittrans(rng, dist::UnivariateDistribution) = Bijectors.invlink(dist, randrealuni(rng))
-function inittrans(rng, dist::MultivariateDistribution)
-    # Get the length of the unconstrained vector
-    b = link_transform(dist)
-    d = Bijectors.output_length(b, length(dist))
-    return Bijectors.invlink(dist, randrealuni(rng, d))
-end
-function inittrans(rng, dist::MatrixDistribution)
-    # Get the size of the unconstrained vector
-    b = link_transform(dist)
-    sz = Bijectors.output_size(b, size(dist))
-    return Bijectors.invlink(dist, randrealuni(rng, sz...))
-end
-function inittrans(rng, dist::Distribution{CholeskyVariate})
-    # Get the size of the unconstrained vector
-    b = link_transform(dist)
-    sz = Bijectors.output_size(b, size(dist))
-    return Bijectors.invlink(dist, randrealuni(rng, sz...))
-end
-################################
-# Multi-sample initialisations #
-################################
-function inittrans(rng, dist::UnivariateDistribution, n::Int)
-    return Bijectors.invlink(dist, randrealuni(rng, n))
-end
-function inittrans(rng, dist::MultivariateDistribution, n::Int)
-    return Bijectors.invlink(dist, randrealuni(rng, size(dist)[1], n))
-end
-function inittrans(rng, dist::MatrixDistribution, n::Int)
-    return Bijectors.invlink(dist, [randrealuni(rng, size(dist)...) for _ in 1:n])
-end
-
 """
     AbstractInitStrategy
 
@@ -49,15 +12,29 @@ the random variables in a model (e.g., when creating a new VarInfo).
 abstract type AbstractInitStrategy end
 
 """
-    Prior()
+    init(rng::Random.AbstractRNG, vn::VarName, dist::Distribution, strategy::AbstractInitStrategy)
+
+Generate a new value for a random variable with the given distribution.
+
+!!! warning "Values must be unlinked"
+    The values returned by `init` are always in the untransformed space, i.e.,
+    they must be within the support of the original distribution. That means that,
+    for example, `init(rng, dist, u::UniformInit)` will in general return values that
+    are outside the range [u.lower, u.upper].
+"""
+function init end
 
-Obtain new values by sampling from the prior.
 """
-struct Prior <: AbstractInitStrategy end
+    PriorInit()
 
+Obtain new values by sampling from the prior distribution.
 """
-    Uniform()
-    Uniform(lower, upper)
+struct PriorInit <: AbstractInitStrategy end
+init(rng::Random.AbstractRNG, ::VarName, dist::Distribution, ::PriorInit) = rand(rng, dist)
+
+"""
+    UniformInit()
+    UniformInit(lower, upper)
 
 Obtain new values by first transforming the distribution of the random variable
 to unconstrained space, and then sampling a value uniformly between `lower` and
@@ -70,41 +47,65 @@ default initialisation strategy.
 
 [Stan reference manual page on initialization](https://mc-stan.org/docs/reference-manual/execution.html#initialization)
 """
-struct Uniform{T<:AbstractFloat} <: AbstractInitStrategy
+struct UniformInit{T<:AbstractFloat} <: AbstractInitStrategy
     lower::T
     upper::T
+    function UniformInit(lower::T, upper::T) where {T<:AbstractFloat}
+        lower > upper &&
+            throw(ArgumentError("`lower` must be less than or equal to `upper`"))
+        return new{T}(lower, upper)
+    end
+    UniformInit() = UniformInit(-2.0, 2.0)
+end
+function init(rng::Random.AbstractRNG, ::VarName, dist::Distribution, u::UniformInit)
+    b = Bijectors.bijector(dist)
+    sz = Bijectors.output_size(b, size(dist))
+    y = rand(rng, Uniform(u.lower, u.upper), sz)
+    b_inv = Bijectors.inverse(b)
+    return b_inv(y)
 end
-Uniform() = Uniform(-2, 2)
 
 """
-    Params(params::AbstractDict{VarName, Any}, default::AbstractInitStrategy)
-    Params(params::NamedTuple, default::AbstractInitStrategy)
+    ParamsInit(params::AbstractDict{<:VarName}, default::AbstractInitStrategy=PriorInit())
+    ParamsInit(params::NamedTuple, default::AbstractInitStrategy=PriorInit())
 
 Obtain new values by extracting them from the given dictionary or NamedTuple.
-These values are assumed to be provided in the space of the untransformed
-distribution.
-
 The parameter `default` specifies how new values are to be obtained if they
-cannot be found in `params`. The default for `default` is `Prior()`.
+cannot be found in `params`, or they are specified as `missing`. The default
+for `default` is `PriorInit()`.
+
+!!! note
+    These values must be provided in the space of the untransformed distribution.
 """
-struct Params{P,S<:AbstractInitStrategy} <: AbstractInitStrategy
+struct ParamsInit{P,S<:AbstractInitStrategy} <: AbstractInitStrategy
     params::P
     default::S
-
-    function Params(
-        params::AbstractDict{VarName,Any}, default::AbstractInitStrategy=Prior()
-    )
+    function ParamsInit(params::AbstractDict{<:VarName}, default::AbstractInitStrategy)
         return new{typeof(params),typeof(default)}(params, default)
     end
-    function Params(params::NamedTuple, default::AbstractInitStrategy=Prior())
-        return Params(to_varname_dict(params), default)
+    ParamsInit(params::AbstractDict{<:VarName}) = ParamsInit(params, PriorInit())
+    function ParamsInit(params::NamedTuple, default::AbstractInitStrategy=PriorInit())
+        return ParamsInit(to_varname_dict(params), default)
+    end
+end
+function init(rng::Random.AbstractRNG, vn::VarName, dist::Distribution, p::ParamsInit)
+    return if hasvalue(p.params, vn)
+        x = getvalue(p.params, vn)
+        if x === missing
+            init(rng, vn, dist, p.default)
+        else
+            # TODO: Check that the type of x matches the dist?
+            x
+        end
+    else
+        init(rng, vn, dist, p.default)
     end
 end
 
 """
     InitContext(
             [rng::Random.AbstractRNG=Random.default_rng()],
-            [strategy::AbstractInitStrategy=Prior()],
+            [strategy::AbstractInitStrategy=PriorInit()],
     )
 
 A leaf context that indicates that new values for random variables are
@@ -115,95 +116,144 @@ VarInfo. Note that, if `leafcontext(model.context) isa InitContext`, then
 struct InitContext{R<:Random.AbstractRNG,S<:AbstractInitStrategy} <: AbstractContext
     rng::R
     strategy::S
-    function InitContext(rng::Random.AbstractRNG, strategy::AbstractInitStrategy=Prior())
+    function InitContext(
+        rng::Random.AbstractRNG, strategy::AbstractInitStrategy=PriorInit()
+    )
         return new{typeof(rng),typeof(strategy)}(rng, strategy)
     end
-    function InitContext(strategy::AbstractInitStrategy=Prior())
+    function InitContext(strategy::AbstractInitStrategy=PriorInit())
         return InitContext(Random.default_rng(), strategy)
     end
 end
 NodeTrait(::InitContext) = IsLeaf()
 
 function tilde_assume(
-    ctx::InitContext{<:Random.AbstractRNG,Prior},
-    dist::Distribution,
-    vn::VarName,
-    vi::AbstractVarInfo,
+    ctx::InitContext, dist::Distribution, vn::VarName, vi::AbstractVarInfo
 )
-    r = rand(ctx.rng, dist)
-    vi[vn] = r
-    # TODO: FIX
-    logjac = 0
-    vi = accumulate_assume!!(vi, r, -logjac, vn, dist)
-    println("sampled $r from $dist for $vn")
-    return r, vi
+    in_varinfo = haskey(vi, vn)
+    # `init()` always returns values in original space, i.e. possibly
+    # constrained
+    x = init(ctx.rng, vn, dist, ctx.strategy)
+    # There is a function `to_maybe_linked_internal_transform` that does this,
+    # but unfortunately it uses `istrans(vi, vn)` which fails if vn is not in
+    # vi, so we have to manually check. By default we will insert an unlinked
+    # value into the varinfo.
+    is_transformed = in_varinfo ? istrans(vi, vn) : false
+    f = if is_transformed
+        to_linked_internal_transform(vi, vn, dist)
+    else
+        to_internal_transform(vi, vn, dist)
+    end
+    # TODO(penelopeysm): We would really like to do:
+    #     y, logjac = with_logabsdet_jacobian(f, x)
+    # Unfortunately, `to_{linked_}internal_transform` returns a function that
+    # always converts x to a vector, i.e., if dist is univariate, f(x) will be
+    # a vector of length 1. It would be nice if we could unify these.
+    y = f(x)
+    logjac = logabsdetjac(is_transformed ? Bijectors.bijector(dist) : identity, x)
+    # Add the new value to the VarInfo. `push!!` errors if the value already
+    # exists, hence the need for setindex!!
+    if in_varinfo
+        vi = setindex!!(vi, y, vn)
+    else
+        vi = push!!(vi, vn, y, dist)
+    end
+    # `accumulate_assume!!` wants untransformed values as the second argument.
+    vi = accumulate_assume!!(vi, x, -logjac, vn, dist)
+    # We always return the untransformed value here, as that will determine
+    # what the lhs of the tilde-statement is set to.
+    return x, vi
 end
 
-# TODO: Remove this thing.
-# function assume(
-#     rng::Random.AbstractRNG,
-#     init_strategy::AbstractInitStrategy,
-#     dist::Distribution,
-#     vn::VarName,
-#     vi::AbstractVarInfo,
+# """
+#     set_initial_values(varinfo::AbstractVarInfo, initial_params::AbstractVector)
+#     set_initial_values(varinfo::AbstractVarInfo, initial_params::NamedTuple)
+#
+# Take the values inside `initial_params`, replace the corresponding values in
+# the given VarInfo object, and return a new VarInfo object with the updated values.
+#
+# This differs from `DynamicPPL.unflatten` in two ways:
+#
+# 1. It works with `NamedTuple` arguments.
+# 2. For the `AbstractVector` method, if any of the elements are missing, it will not
+# overwrite the original value in the VarInfo (it will just use the original
+# value instead).
+# """
+# function set_initial_values(varinfo::AbstractVarInfo, initial_params::AbstractVector)
+#     throw(
+#         ArgumentError(
+#             "`initial_params` must be a vector of type `Union{Real,Missing}`. " *
+#             "If `initial_params` is a vector of vectors, please flatten it (e.g. using `vcat`) first.",
+#         ),
+#     )
+# end
+#
+# function set_initial_values(
+#     varinfo::AbstractVarInfo, initial_params::AbstractVector{<:Union{Real,Missing}}
 # )
-#     if haskey(vi, vn)
-#         # Always overwrite the parameters with new ones for `SampleFromUniform`.
-#         if sampler isa SampleFromUniform || is_flagged(vi, vn, "del")
-#             # TODO(mhauru) Is it important to unset the flag here? The `true` allows us
-#             # to ignore the fact that for VarNamedVector this does nothing, but I'm unsure
-#             # if that's okay.
-#             unset_flag!(vi, vn, "del", true)
-#             r = init(rng, dist, sampler)
-#             f = to_maybe_linked_internal_transform(vi, vn, dist)
-#             # TODO(mhauru) This should probably be call a function called setindex_internal!
-#             vi = BangBang.setindex!!(vi, f(r), vn)
-#             setorder!(vi, vn, get_num_produce(vi))
-#         else
-#             # Otherwise we just extract it.
-#             r = vi[vn, dist]
-#         end
-#     else
-#         r = init(rng, dist, sampler)
-#         if istrans(vi)
-#             f = to_linked_internal_transform(vi, vn, dist)
-#             vi = push!!(vi, vn, f(r), dist)
-#             # By default `push!!` sets the transformed flag to `false`.
-#             vi = settrans!!(vi, true, vn)
-#         else
-#             vi = push!!(vi, vn, r, dist)
+#     flattened_param_vals = varinfo[:]
+#     length(flattened_param_vals) == length(initial_params) || throw(
+#         DimensionMismatch(
+#             "Provided initial value size ($(length(initial_params))) doesn't match " *
+#             "the model size ($(length(flattened_param_vals))).",
+#         ),
+#     )
+#
+#     # Update values that are provided.
+#     for i in eachindex(initial_params)
+#         x = initial_params[i]
+#         if x !== missing
+#             flattened_param_vals[i] = x
 #         end
 #     end
 #
-#     # HACK: The above code might involve an `invlink` somewhere, etc. so we need to correct.
-#     logjac = logabsdetjac(istrans(vi, vn) ? link_transform(dist) : identity, r)
-#     vi = accumulate_assume!!(vi, r, -logjac, vn, dist)
-#     return r, vi
+#     # Update in `varinfo`.
+#     new_varinfo = unflatten(varinfo, flattened_param_vals)
+#     return new_varinfo
 # end
-
-# function assume(
-#     rng::Random.AbstractRNG,
-#     sampler::Union{SampleFromPrior,SampleFromUniform},
-#     dist::Distribution,
-#     vn::VarName,
-#     vi::SimpleOrThreadSafeSimple,
-# )
-#     value = init(rng, dist, sampler)
-#     # Transform if we're working in unconstrained space.
-#     f = to_maybe_linked_internal_transform(vi, vn, dist)
-#     value_raw, logjac = with_logabsdet_jacobian(f, value)
-#     vi = BangBang.push!!(vi, vn, value_raw, dist)
-#     vi = accumulate_assume!!(vi, value, -logjac, vn, dist)
-#     return value, vi
-# end
-
-# Initializations.
-# init(rng, dist, ::SampleFromPrior) = rand(rng, dist)
-# function init(rng, dist, ::SampleFromUniform)
-#     return istransformable(dist) ? inittrans(rng, dist) : rand(rng, dist)
+#
+# function set_initial_values(varinfo::AbstractVarInfo, initial_params::NamedTuple)
+#     varinfo = deepcopy(varinfo)
+#     vars_in_varinfo = keys(varinfo)
+#     for v in keys(initial_params)
+#         vn = VarName{v}()
+#         if !(vn in vars_in_varinfo)
+#             for vv in vars_in_varinfo
+#                 if subsumes(vn, vv)
+#                     throw(
+#                         ArgumentError(
+#                             "The current model contains sub-variables of $v, such as ($vv). " *
+#                             "Using NamedTuple for initial_params is not supported in such a case. " *
+#                             "Please use AbstractVector for initial_params instead of NamedTuple.",
+#                         ),
+#                     )
+#                 end
+#             end
+#             throw(ArgumentError("Variable $v not found in the model."))
+#         end
+#     end
+#     initial_params = NamedTuple(k => v for (k, v) in pairs(initial_params) if v !== missing)
+#     return update_values!!(
+#         varinfo, initial_params, map(k -> VarName{k}(), keys(initial_params))
+#     )
 # end
 #
-# init(rng, dist, ::SampleFromPrior, n::Int) = rand(rng, dist, n)
-# function init(rng, dist, ::SampleFromUniform, n::Int)
-#     return istransformable(dist) ? inittrans(rng, dist, n) : rand(rng, dist, n)
+# function initialize_parameters!!(vi::AbstractVarInfo, initial_params, model::Model)
+#     @debug "Using passed-in initial variable values" initial_params
+#
+#     # `link` the varinfo if needed.
+#     linked = islinked(vi)
+#     if linked
+#         vi = invlink!!(vi, model)
+#     end
+#
+#     # Set the values in `vi`.
+#     vi = set_initial_values(vi, initial_params)
+#
+#     # `invlink` if needed.
+#     if linked
+#         vi = link!!(vi, model)
+#     end
+#
+#     return vi
 # end