= 20
lambda = 5
n_depth = (n_depth * lambda + 4 * sqrt(n_depth * lambda))
grid_size = 0:grid_size
age_seq
# theoretical distributions
= function (x) x / sum(x)
normalize
= dpois(age_seq, lambda = 4 * lambda)
four_th = dpois(age_seq, lambda = 5 * lambda)
five_th
# five from four
= {
five_ff = sapply(age_seq, function (x) {
tmp dpois(age_seq - x, lambda = lambda) * dpois(x, lambda = 4 * lambda)
})= rowSums(tmp)
tmp = normalize(tmp)
tmp
tmp
}rm(tmp)
# plot
= par(no.readonly = TRUE)
opar par(mfrow = c(3, 1))
plot(age_seq, five_ff, type = 'h')
plot(age_seq, five_th, type = 'h')
plot(age_seq, four_th, type = 'h')
Here we will explore the poisson process as a prior to model age-depth relations. The goal is to be able to add 14C data one at a time, and adjust the posterior set of chronologies accordingly.
This is what we need:
compute age probability distribution for any depth unit conditional on the age of the previous depth unit or the age of the next depth unit
do this for each possible age in the unit we are conditioning on, scale the probabilities by the probability of that particular age
sum all resulting distributions (age-wise) and normalize
compare this distribution to the theoretical distribution from a poisson process
do the same using a calibrated 14C age distribution times the poisson prior
If this works fine we could propagate the 14C uncertainty up and down the age model without recurring to MCMC sampling.
Five from four - first attempt
Let’s try with a simple poisson process and see if we can compute the distribution for time \(t=5\) from \(t=4\):
Differences are pretty small, all.equal
wont consider them after normalization (output from dpois
doesn’t add up to one):
par(mfrow = c(2, 1))
# test equality
identical(five_ff, five_th)
[1] FALSE
all.equal(five_ff, five_th)
[1] "Mean relative difference: 6.401264e-05"
all.equal(five_ff, normalize(five_th))
[1] TRUE
plot(five_ff - five_th, type = 'b')
plot(five_ff - normalize(five_th), type = 'b')
Logspace
This seems more like a precission issue than an approximation one. Lets try using the logspace to deal with small numbers. We will first need a function to sum the log-probabilities instead of rowSums
. I am taking this one form this stackoverflow answer. The function uses recursion to sum vectors. It expects a vector of log-probabilities:
# a function to sum log-probabilities
= function (x) {
log_sum if (length(x) == 1) return(x)
if (length(x) == 2) return(max(x) + log1p(exp(-abs(diff(x)))))
= x[1:2]
this = x[-c(1:2)]
other = c(log_sum(this), other)
out return(log_sum(out))
}
Now lets rewrite the whole computation using the log-probabilities instead:
# five from four
= {
five_ff = sapply(age_seq, function (x) {
tmp dpois(age_seq - x, lambda = lambda, log = TRUE) + dpois(x, lambda = 4 * lambda, log = TRUE)
})= apply(tmp, 1, log_sum)
tmp = normalize(exp(tmp))
tmp
tmp
}rm(tmp)
# plot
par(mfrow = c(3, 1))
plot(age_seq, five_ff, type = 'h')
plot(age_seq, five_th, type = 'h')
plot(age_seq, four_th, type = 'h')
# test equality
par(mfrow = c(2, 1))
identical(five_ff, five_th)
[1] FALSE
all.equal(five_ff, five_th)
[1] "Mean relative difference: 6.401264e-05"
all.equal(five_ff, normalize(five_th))
[1] TRUE
plot(five_ff - five_th, type = 'b')
plot(five_ff - normalize(five_th), type = 'b')
No difference… bollocks
Convolution (bad implementation?)
Lets use the convolution to get \(t=5\) as the sum of \(t=4\) and a poisson r.v. with \(\lambda = 1\):
= sapply(age_seq, function (x) {
five_ff sum(dpois(x, lambda = 4 * lambda) * dpois(age_seq - x, lambda = lambda))
})
# plot
par(mfrow = c(3, 1))
plot(age_seq, five_ff, type = 'h')
plot(age_seq, five_th, type = 'h')
plot(age_seq, four_th, type = 'h')
# test equality
par(mfrow = c(2, 1))
identical(five_ff, five_th)
[1] FALSE
all.equal(five_ff, five_th)
[1] "Mean relative difference: 1.418433"
all.equal(five_ff, normalize(five_th))
[1] "Mean relative difference: 1.418479"
plot(five_ff - five_th, type = 'b')
plot(five_ff - normalize(five_th), type = 'b')
Melvin Dale’s convolution for discrete independent r.v.s
Now let’s try Melvin Dale’s (1979) version for discrete random variables. Let \(Z = X + Y\), all r.v.s with pmfs \(F_X; F_Y\) and where \(P(X = i) = a_i; P(Y = i) = b_i; P(Z = i) = c_i\)
\[ c_i = a_0b_i + a_1b_{i-1} + ... + a_{i -1}b_i + a_ib_0\]
= sapply(age_seq, function (x) {
five_ff sum(dpois(0:x, lambda = 4 * lambda) * rev(dpois(0:x, lambda = lambda)))
})
# plot
par(mfrow = c(3, 1))
plot(age_seq, five_ff, type = 'h')
plot(age_seq, five_th, type = 'h')
plot(age_seq, four_th, type = 'h')
# test equality
par(mfrow = c(2, 1))
identical(five_ff, five_th)
[1] FALSE
all.equal(five_ff, five_th)
[1] TRUE
all.equal(five_ff, normalize(five_th))
[1] "Mean relative difference: 6.401674e-05"
plot(five_ff - five_th, type = 'b')
plot(five_ff - normalize(five_th), type = 'b')
Same approximation now using the logspace:
= {
five_ff = sapply(age_seq, function (x) {
tmp log_sum(dpois(0:x, lambda = 4 * lambda, log = TRUE) +
rev(dpois(0:x, lambda = lambda, log = TRUE)))
})exp(tmp)
}
rm(tmp)
# plot
par(mfrow = c(3, 1))
plot(age_seq, five_ff, type = 'h')
plot(age_seq, five_th, type = 'h')
plot(age_seq, four_th, type = 'h')
# test equality
par(mfrow = c(2, 1))
identical(five_ff, five_th)
[1] FALSE
all.equal(five_ff, five_th)
[1] TRUE
all.equal(five_ff, normalize(five_th))
[1] "Mean relative difference: 6.401674e-05"
plot(five_ff - five_th, type = 'b')
plot(five_ff - normalize(five_th), type = 'b')
# reset graphical parameters
par(opar)
Again, bollocks… maybe these differences are not that important?
A realistic example
Let’s assume that the differences for the Melvin Dale in logspace approach are due to computational precission (this are the smallest) and explore how would this work for real data. At this point, we are only able to cumpute probabilities for following steps within the process (sums), yet we would need to be able to compute these backwards also. In order to do so we need to learn how to cumpute the probability distribution for a difference.
From Melvin Dale, the pdf of the difference \(Z = X - Y\) is also the fourier convolution of the pdfs \(F_X\) and \(F_Y\):
\[ P(Z = k) = \sum F_X(k + y)F_Y(y) \]
If we find an expansion form for this similar to the one we used for the sum of two r.v.s we should be ok. Following the same logic and taking advantage of the discreteness of the r.v.s, say \(Z = X - Y; P(X = i) = a_i; P(Y = i) = b_i; P(Z = i) = c_i\). Here we are thinking of each random variable as a colection of values and associated probabilities such that \(X = [x_0, x_1, ..., x_i, ..., x_n]; P(X = x_i = i) = a_i\). We want to sum the probabilities associated with each pair of values for \(X\) and \(Y\) that would yield a particular \(z_i\):
\[ \begin{align} c_0 = a_0b_0 + a_1b_1 + a_2b_2 + ... + a_nb_n \\ c_1 = a_1b_0 + a_2b_1 + a_3b_2 + ... + a_nb_{n - 1} \\ c_2 = a_2b_0 + a_3b_1 + a_4b_2 + ... + a_nb_{n - 2} \\ \vdots \\ c_i = a_ib_0 + ... + a_nb_{n - i} \\ \vdots \\ c_{n - 1} = a_{n - 1}b_0 + a_nb_1 \\ c_n = a_nb_0 \end{align} \]
Let’s try to estimate \(t = 4\) from \(t = 5\):
= {
four_ff = sapply(age_seq, function (x) {
tmp log_sum(dpois(x:grid_size, lambda = 5 * lambda, log = TRUE) +
dpois(0:(grid_size - x), lambda = lambda, log = TRUE))
})exp(tmp)
}
rm(tmp)
# plot
par(mfrow = c(3, 1))
plot(age_seq, five_th, type = 'h')
plot(age_seq, four_ff, type = 'h')
plot(age_seq, four_th, type = 'h')
# test equality
par(opar)
identical(four_ff, four_th)
[1] FALSE
all.equal(four_ff, four_th)
[1] "Mean relative difference: 0.195754"
all.equal(five_ff, normalize(five_th))
[1] "Mean relative difference: 6.401674e-05"
plot(four_ff - four_th, type = 'b')
Seems like there is no way to estimate the previous step without incresing uncentainty… Maybe that’s ok, we shall try with a realistic example and see what happens.