New Model Supertype
Kirstine.jl comes with support for nonlinear regression models. The theory of optimal design is of course applicable also to other regression settings. This vignette explains how to extend Kirstine.jl so that we can also find optimal designs for logistic regression.
The statistical model is
\[\begin{align*} \Unit_{\IndexUnit} \mid π_{\IndexUnit} &\simiid \BernoulliDist(π_{\IndexUnit}) \\ \Logit(π_{\IndexUnit}) &= ν(\Covariate_{\IndexUnit}, \Parameter). \end{align*}\]
Since we allow the predictor to be a nonlinear function $ν : \CovariateSet × \ParameterSet → \Reals$, our setup is more general the usual logistic GLM where the predictor is linear.
Now, we need to find the Fisher information matrix of the model. With the log-likelihood
\[\LogLikelihood(\Unit,\Covariate,\Parameter) = \Unit \log(\Expit(ν(\Covariate, \Parameter))) + (1 - \Unit) \log(1 - \Expit(ν(\Covariate, \Parameter)))\]
and some calculations we arrive at
\[- \Expectation[\Hessian\LogLikelihood\mid\Parameter] = \Expit(ν) (1 - \Expit(ν)) \TotalDiff ν' \TotalDiff ν.\]
Implementation
First we introduce a new subtype of Kirstine.Model
, and a corresponding ModelWorkspace
to hold the pre-allocated Jacobian matrix of the predictor. Then we implement a method for average_fishermatrix!
that specializes on logistic regression models and returns the upper triangle of the averaged Fisher matrices.
If we had any expressions that depend on $\Covariate$, but not on $\Parameter$, we could store them in additional fields of the ModelWorkspace
and compute them in update_model_workspace
.
using Kirstine, Plots, Random
import LinearAlgebra: BLAS
abstract type LogisticRegression <: Kirstine.Model end
# We can define this on the supertype since it will always be the case.
Kirstine.unit_length(m::LogisticRegression) = 1
struct LRWorkspace <: Kirstine.ModelWorkspace
jm_predictor::Matrix{Float64}
end
# We can ignore the number of design points `K` here.
function Kirstine.allocate_model_workspace(
K::Integer,
m::LogisticRegression,
pk::PriorSample,
)
return LRWorkspace(zeros(1, dimension(parameters(pk)[1])))
end
# nothing to do
function Kirstine.update_model_workspace!(
mw::LRWorkspace,
m::LogisticRegression,
c::AbstractVector{<:Kirstine.Covariate},
)
return mw
end
expit(x) = 1 / (1 + exp(-x))
function Kirstine.average_fishermatrix!(
afm::AbstractMatrix{<:Real},
mw::LRWorkspace,
w::AbstractVector,
m::LogisticRegression,
c::AbstractVector{<:Kirstine.Covariate},
p::Kirstine.Parameter,
)
fill!(afm, 0.0)
for k in 1:length(w)
# We need to implement the next two functions for concrete subtypes.
predictor_jacobianmatrix!(mw.jm_predictor, m, c[k], p)
prob = expit(nonlinear_predictor(m, c[k], p))
p1mp = prob * (1 - prob)
BLAS.syrk!('U', 'T', p1mp * w[k], mw.jm_predictor, 1.0, afm)
end
return afm
end
The call to BLAS.syrk!
is a more efficient way to compute
afm .+= p1mp * w[k] * mw.jm_predictor' * mw.jm_predictor
in-place. Since the result is symmetric, only the upper triangle of afm
is filled.
Examples
Linear Predictor
We start with a linear predictor and a $c$-dimensional covariate
\[ν(\Covariate, \Parameter) = β₀ + \sum_{j=1}^c β_j x_j\]
where $\Parameter = (β_0, β_1, …, β_c)$.
struct LinLogReg <: LogisticRegression
dim_covariate::Int64
end
mutable struct LLRCovariate <: Kirstine.Covariate
x::Vector{Float64}
end
Kirstine.allocate_covariate(m::LinLogReg) = LLRCovariate(zeros(m.dim_covariate))
struct LLRParameter <: Kirstine.Parameter
beta_0::Float64
beta_rest::Vector{Float64}
end
Kirstine.dimension(p::LLRParameter) = length(p.beta_rest) + 1
struct CopyVector <: Kirstine.CovariateParameterization end
function Kirstine.map_to_covariate!(c::LLRCovariate, dp, m::LinLogReg, cp::CopyVector)
if length(dp) != length(c.x)
error("dimension of covariate and design point don't match")
end
c.x .= dp
return c
end
function nonlinear_predictor(m::LinLogReg, c::LLRCovariate, p::LLRParameter)
dim_c = length(c.x)
if length(p.beta_rest) != dim_c
error("dimensions of covariate and parameter don't match")
end
np = p.beta_0
for j in 1:dim_c
np += p.beta_rest[j] * c.x[j]
end
return np
end
function predictor_jacobianmatrix!(jm, m::LinLogReg, c::LLRCovariate, p::LLRParameter)
dim_c = length(c.x)
if length(p.beta_rest) != dim_c
error("dimensions of covariate and parameter don't match")
end
jm[1, 1] = 1
for j in 1:dim_c
jm[1, j + 1] = c.x[j]
end
return jm
end
1-Dimensional Covariate
First, let's look at a locally optimal, 1-dimensional example from Section 6.2, p. 154 of [FL13], where
\[\begin{align*} \DesignRegion &= \CovariateSet = [0, 4] \\ \Parameter &= (-3, 1.81) \end{align*}\]
dp0 = DesignProblem(
criterion = DCriterion(),
region = DesignInterval(:x1 => (0, 4)),
covariate_parameterization = CopyVector(),
prior_knowledge = PriorSample([LLRParameter(-3, [1.81])]),
model = LinLogReg(1),
)
Random.seed!(12345)
str0 = DirectMaximization(
optimizer = Pso(swarmsize = 100, iterations = 50),
prototype = random_design(region(dp0), 4),
)
Random.seed!(12345)
s0, r0 = solve(dp0, str0; maxdist = 1e-3)
s0
DesignMeasure(
[0.8047475621673699] => 0.49999389060536853,
[2.5101732141323296] => 0.5000061093946315,
)
gd0 = plot_gateauxderivative(s0, dp0)
er0 = plot_expected_function(
(x, c, p) -> expit(p.beta_0 + p.beta_rest[1] * x),
c -> [c.x[1]],
(c, p) -> [expit(p.beta_0 + p.beta_rest[1] * c.x[1])],
range(lowerbound(region(dp0))[1], upperbound(region(dp0))[1]; length = 101),
s0,
model(dp0),
covariate_parameterization(dp0),
prior_knowledge(dp0);
xguide = "x",
)
2-Dimensional Covariate
Next, we are looking at a 2-dimensional design that is locally optimal for estimating the odds ratios
\[\Transformation(\Parameter) = (\exp(β₁), \exp(β₂))'.\]
when
\[\begin{align*} \DesignRegion &= \CovariateSet = [0, 1]^2 \\ \Parameter &= (-4, 6, -3) \end{align*}\]
dp1 = DesignProblem(
criterion = DCriterion(),
region = DesignInterval(:x1 => (0, 1), :x2 => (0, 1)),
covariate_parameterization = CopyVector(),
prior_knowledge = PriorSample([LLRParameter(-4, [6, -3])]),
model = LinLogReg(2),
transformation = DeltaMethod(p -> [0 exp(p.beta_rest[1]) 0; 0 0 exp(p.beta_rest[2])]),
)
Random.seed!(12345)
str1 = DirectMaximization(
optimizer = Pso(swarmsize = 100, iterations = 50),
prototype = random_design(region(dp1), 4),
)
Random.seed!(12345)
s1, r1 = solve(dp1, str1; maxdist = 5e-2, maxweight = 1e-3)
gd1 = plot_gateauxderivative(s1, dp1)
function erplot1(d, dp)
f1(x, c, p) = expit(p.beta_0 + p.beta_rest[1] * x + p.beta_rest[2] * c.x[2])
f2(x, c, p) = expit(p.beta_0 + p.beta_rest[1] * c.x[1] + p.beta_rest[2] * x)
g1(c) = [c.x[1]]
g2(c) = [c.x[2]]
h(c, p) = [expit(p.beta_0 + p.beta_rest[1] * c.x[1] + p.beta_rest[2] * c.x[2])]
xrange = range(lowerbound(region(dp))[1], upperbound(region(dp))[1]; length = 101)
cp = covariate_parameterization(dp)
pk = prior_knowledge(dp)
m = model(dp)
p1 = plot_expected_function(f1, g1, h, xrange, d, m, cp, pk; xguide = "x1")
p2 = plot_expected_function(f2, g2, h, xrange, d, m, cp, pk; xguide = "x2")
return plot(p1, p2)
end
er1 = erplot1(s1, dp1)
Nonlinear Predictor
A slightly silly example.
\[ν(\Covariate, \Parameter) = a \sin(2π b (t - c))\]
where $\Covariate = t$ and $\Parameter = (a, b, c)$ with $a > 0$, $b > 0$ and $0 ≤ c < 1$. We are interested in estimating all of $\Parameter$.
struct SinLogReg <: LogisticRegression end
mutable struct Time <: Kirstine.Covariate
t::Float64
end
Kirstine.allocate_covariate(m::SinLogReg) = Time(0)
@simple_parameter SLR a b c
function nonlinear_predictor(m::SinLogReg, c::Time, p::SLRParameter)
return p.a * sin(2 * pi * p.b * (c.t - p.c))
end
function predictor_jacobianmatrix!(jm, m::SinLogReg, c::Time, p::SLRParameter)
sin_term = sin(2 * pi * p.b * (c.t - p.c))
cos_term = cos(2 * pi * p.b * (c.t - p.c))
jm[1, 1] = sin_term
jm[1, 2] = p.a * cos_term * 2 * pi * (c.t - p.c)
jm[1, 3] = -p.a * cos_term * 2 * pi * p.b
return jm
end
function erplot2(d, dp)
f(x, c, p) = expit(p.a * sin(2 * pi * p.b * (x - p.c)))
g(c) = [c.t]
h(c, p) = [expit(p.a * sin(2 * pi * p.b * (c.t - p.c)))]
xrange = range(lowerbound(region(dp))[1], upperbound(region(dp))[1]; length = 101)
cp = covariate_parameterization(dp)
pk = prior_knowledge(dp)
m = model(dp)
return plot_expected_function(f, g, h, xrange, d, m, cp, pk; xguide = "time")
end
dp2 = DesignProblem(
criterion = DCriterion(),
region = DesignInterval(:t => (0, 1)),
covariate_parameterization = JustCopy(:t),
prior_knowledge = PriorSample([SLRParameter(a = 1, b = 1.5, c = 0.125)]),
model = SinLogReg(),
)
Random.seed!(12345)
str2 = DirectMaximization(
optimizer = Pso(swarmsize = 100, iterations = 50),
prototype = random_design(region(dp2), 4),
)
Random.seed!(12345)
s2, r2 = solve(dp2, str2; maxdist = 5e-2, maxweight = 1e-3)
gd2 = plot_gateauxderivative(s2, dp2)
er2 = erplot2(s2, dp2)
- FL13Valerii V. Fedorov and Sergei L. Leonov (2013). Optimal design for nonlinear response models. CRC Press. doi:10.1201/b15054