# -*- Text -*- for GNU Emacs
##############################################################################
# R code for setting up coupling from the past (CFTP) with Hamiltonian Monte
#   Carlo (HMC).  This script comprises the functions "setglobal" to set
#   global variables, "setrandom" to assign random numbers, "chooserandom" to
#   select a subset of random numbers, and "setstart" to generate starting
#   points.
# Author: George Leigh
# Copyright 2021 George Leigh
# Associated journal article: Leigh, G. M. and Northrop, A. R. (2021).
#   "Design of Hamiltonian Monte Carlo for coalescence to near-perfect
#   simulation of general continuous distributions".
# Citation of the journal article in any publication that uses the algorithms
#   published in it would be greatly appreciated by the authors.

# This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published
#   by the Free Software Foundation, either version 2 of the License,
#   or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but
#   WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#   General Public License for more details.
# You should have received a copy of the GNU General Public License
#   along with this program.  If not, see
#   <https://www.gnu.org/licenses/>.

##############################################################################

######################################## Set global variables, which are not
#   expected to change throughout the running of this code for the results
#   shown in the article.
setglobal = function(d = 1) {
 nrunglobal <<- 20 # Number of independent runs (sets of random numbers)
 maxpoint <<- 1000 # Maximum number of points that we are allowed to construct
 #   on each side of a trajectory
 CoalesceTol <<- 1.0e-6 # Tolerance for coalescence: purely to cover numerical
 #   round-off error: coalescence is exact if the computer's arithmetic is.
 #   The numerical errors are surprisingly large in our examples.  Note that
 #   the target distribution is intended to be set to have unit variance, so
 #   this tolerance is the fraction of a standard deviation.

 # Random-number column indices
 jcount = 0
 jcount = jcount + 1; jK    <<- jcount # Kinetic energy K
 jcount = jcount + 1; jsel  <<- jcount  # Trajectory destination point
 jcount = jcount + 1; jMH   <<- jcount  # Trajectory Metropolis-Hastings test
 jcount = jcount + 1; jMHro <<- jcount  # Rounding Metropolis-Hastings test
 nsing <<- jcount # Number of columns of single random numbers
 jmom  <<- jcount + (1:d); jcount = jcount + d # Momentum vector p
 jdir  <<- jcount + (1:d); jcount = jcount + d # Direction vector
 jro   <<- jcount + (1:d); jcount = jcount + d # Rounding congruences
 ncolsim <<- jcount # Total number of columns of random numbers

 # Additional global variables are created in function "Hmc" in script
 #   "Alg1_Hmc.R":
 # - acceptsto: Number of trajectories accepted by Metropolis-Hastings tests
 # - npointsto: Number of points in those trajectories
} # setglobal

######################################## Set the values of the kinetic energy
#   parameter beta, and generate the random numbers.  Note: The paper finds
#   that beta = 2 for all iterations works best, but that wasn't apparent
#   before the research was conducted!
setrandom = function(d = 1, betaset = 2, alpha = 2, h = 0.05, nrun =
  nrunglobal, maxpast = 2000, w = 0.01, seed = 1) {
 # Function arguments:
 # - d: Number of dimensions
 # - betaset: Vector of values of beta to set (see description below of how
 #   long it can be)
 # - alpha: Parameter to characterize the tails of the target distribution:
 #   short-tailed distribution has alpha = 2; long-tailed distribution has
 #   alpha < 2.  See Supplementary section S2.
 # - h: Parameter for setting the time step (reciprocal of desired number of
 #   points in each trajectory)
 # - nrun: Number of runs: typically takes the value of the global variable
 #   "nrunglobal", unless being called by ROCFTP, in which case it should take
 #   the value 1 because this function will be called once for each ROCFTP
 #   block.
 # - maxpast: Maximum allowed number of iterations (HMC trajectories): the
 #   random numbers will change if a different value of maxpast is used.
 #   Values higher than 2000 (2500 and 4000) are used for some of the
 #   multivariate t-distributions described in the article.
 # - w: Width for final rounding, to make coalescence exact
 # - seed: Random number seed (argument to function "set.seed")
 nrvec = nrun * maxpast # Total number of random vectors to generate
 betavec = rep(2, maxpast) # Initialise all beta values to 2.
 # Note that R's routine "rgamma" uses a variable number of random numbers
 #   depending on its shape parameter, which is derived from beta.  Therefore
 #   the values of beta are important to the reproducibility of the random
 #   numbers.  Also for that reason, I have generated the kinetic-energy
 #   random numbers last, after all of the others, and reversed the order of
 #   them, with the expectation that if values of beta less than 2 are used,
 #   they will be early in the sequence, and all the late ones will be beta =
 #   2 (the only value which we have found to give coalescence).
 # The length of betaset does not have to match that of betavec.  If betaset
 #   is shorter, it is used to set the late values of beta.  The early values
 #   remain set to 2.  If betaset is longer, only the last maxpast values will
 #   be used.
 betalen = length(betaset)
 nbetaset = min(betalen, maxpast) # Number of values to set
 betavec[(maxpast - nbetaset + 1):maxpast] =
  betaset[(betalen - nbetaset + 1):betalen]
 # Expand the beta vector to cover the different sets of random numbers
 #   (runs).  Note: This is only for generating the random numbers.  This
 #   function will return the unexpanded vector.
 betarep = rep(betavec, nrun)

 # Formula for the time step (see article and derivation in Supplementary
 #   section S2).
 dtvec = 2 * h * alpha^(1 / alpha) * betavec^(1 / betavec - 1.0) *
  exp(lgamma(d / betavec) - lgamma((d - 1) / betavec + 1.0) +
   lgamma(d / alpha + (d - 1) / betavec + 1.0) -
   lgamma((d - 1) / alpha + (d - 1) / betavec + 1.0))

 # Generate the random numbers.
 # GML 20210905: Ensure that we use a uniform distribution for the rounding
 #   random numbers, not a normal distribution.  This will make slight changes
 #   to the values of the results, and to whether the rounded values are
 #   accepted by the final Metropolis-Hastings test for the rounding.
 #   Therefore there will be slight changes to the number of iterations taken
 #   to coalesce, which means the results have to be redone :( .  They will
 #   generally improve, as some of the normal numbers would have been large
 #   and resulted in Metropolis-Hastings rejection which may no longer occur.
 # A normal random variable takes two random numbers, whereas a uniform one
 #   takes only one.  We could duplicate the generation of the rounding random
 #   numbers, in order to make minimal changes to the results from the earlier
 #   version.  However, given that the results will change anyway, it doesn't
 #   seem worthwhile.
 # Zero or negative seed means use existing setting of random number seed.  We
 #   need that for ROCFTP, so as to repeat the same set of random numbers on
 #   the next call.
 if (seed > 0) set.seed(seed)
 rsim = array(0, dim = c(nrvec, ncolsim)) # Matrix to hold the simulations
 # I have checked that the rgamma routine correctly uses a vector for the
 #   shape parameter, even though its help doesn't mention that.  It doesn't
 #   work for matrices though!
 # We do the gamma random variables (column jK) last, because the number of
 #   random numbers they consume depends on the beta value.  I've also changed
 #   the order of them, to preserve the late rows if the beta value for them
 #   stays the same but the beta value for the early rows changes.  This is a
 #   bit complex.  We have to reverse the order within each run, and do the
 #   simulations for all the late rows (combined over all runs) before the
 #   early ones.  I do this with R's matrix function, reading by rows instead
 #   of columns.
 # It is safer to do each set of columns separately, rather than
 #   simulate multiple sets with the same function call.
 rsim[, jsel]  = runif(nrvec)
 rsim[, jMH]   = runif(nrvec)
 rsim[, jMHro] = runif(nrvec)
 rsim[, jmom]  = rnorm(d * nrvec) # 2 random numbers per random variable
 rsim[, jdir]  = rnorm(d * nrvec) # 2 random numbers per random variable
 rsim[, jro]   = runif(d * nrvec)
 #rsim[, jro] = runif(d * nrvec) # Repeat to make the random numbers the same
 #   as the old version of the code, so that we don't have to redo all the
 #   results in the article (didn't work well enough so commented out).

 # Finally, simulate K using gamma distributions (see above for why this is
 #   tricky).
 rsim[, jK] = rev(c(matrix(rgamma(nrvec, shape = d / c(matrix(rev(betarep),
  byrow = TRUE, ncol = maxpast, nrow = nrun))), byrow = TRUE, nrow = maxpast,
  ncol = nrun)))

 # Convert the directions of the momentum p and the direction vector b from
 #   normal random variables to directions (unit vectors).  Also multiply the
 #   direction unit vector of p by the magnitude calculated from K.  I have
 #   used d random numbers to generate a unit vector, where it would be
 #   possible, with a lot more work, to use only (d - 1) random numbers.  My
 #   way uses the statistical result that independent standard normal
 #   coordinates provide a multivariate standard normal distribution, so the
 #   distribution of its unit vector must be uniform on the unit sphere.
 f = function(x) x / sqrt(sum(x^2))
 # The cbind function below is for when d = 1, to force it into a matrix.
 rsim[, jmom] = rep((betarep * rsim[, jK])^(1 / betarep), d) *
  t(apply(cbind(rsim[, jmom]), 1, f))
 rsim[, jdir] = t(apply(cbind(rsim[, jdir]), 1, f))

 # Convert the rounding congruences of q from uniform on the interval
 #   [0, 1] to uniform on [0, w].
 rsim[, jro] = w * rsim[, jro]
 list(beta = betavec, dt = dtvec, rsim = rsim)
} # setrandom

######################################## Choose a specified number of
#   trajectories to simulate, which is usually less than the maximum allowed
#   number.  The function returns simply a logical vector of which ones to
#   pick.
chooserandom = function(npast, rlist) {
 # Function arguments:
 # - npast: Desired number of trajectories: must be less than or equal to
 #   length(rlist$beta).
 # - rlist: List matching the output of "setrandom" above: components are
 # - - beta: Vector of beta values, of length maxpast
 # - - dt: Vector of time steps, of length maxpast
 # - - rsim: Matrix of random numbers, number of rows = nrun * maxpast
 nrvec = nrow(rlist$rsim) # Total number of rows of random numbers, covering
 #   both different trajectories and different runs
 maxpast = length(rlist$beta)
 if (nrvec %% maxpast != 0 | length(rlist$dt) != maxpast) stop(
  "Numbers of trajectories do not match.")
 if (npast > maxpast) stop("Too many trajectories requested")
 nrun = nrvec / maxpast
 lchoose = (1:maxpast) > maxpast - npast + 0.1 # Choose the last npast
 #   trajectories.
 list(beta = rlist$beta[lchoose], dt = rlist$dt[lchoose],
  rsim = rlist$rsim[rep(lchoose, times = nrun),])
} # chooserandom

######################################## Set the starting points.
# Note: If I had my time again, I would set the random number seed here, to a
#   value other than that used above for the HMC random numbers.  Here,
#   however, we assume that "setrandom" has been run before this function, and
#   we continue the random numbers from there.
setstart = function(d = 1, mu = 0) {
 # Arguments: d = number of dimensions; mu = first-coordinate position of
 #   second mode (first mode is assumed to be always at zero, and second mode
 #   is assumed to be at zero in all coordinates except the first)
 # Set mu = 0 for unimodal distributions.

 dlim = min(d, 5) # Limited value of d, to keep the number of starting points
 #   manageable
 nstart = 2^dlim + 1 # Number of different starting points
 qstart = array(0, dim = c(nstart, d)) # Initialise matrix of starting points.
 # Last starting point is set to zeros (intended to be the maximum likelihood
 #   point), so there is no need to change the initial values for that one.

 # Deterministic starting values for the first dlim coordinates
 for (istart in 1:(2^dlim)) {
  x = istart - 1
  xdig = rep(0, dlim) # Vector to store the binary digits of x (0 or 1)
  for (j in 1:dlim) {
   xdig[j] = x %% 2
   x = x %/% 2
  }
  qstart[istart, 1:dlim] = 6.0 * (2 * xdig - 1) # Convert from (0, 1) to (-1,
  #   1), and multiply by 6 to get extreme values.  Note coordinates are
  #   intended to have been scaled to have unit variance and zero at the
  #   maximum-likelihood point.
 }

 # Random starting values for the other coordinates, if any
 if (d > dlim)
  for (istart in 1:(nstart - 1)) # Omit last starting point which is zero.
   qstart[istart, (dlim + 1):d] = 6.0 * (2 * round(runif(d - dlim)) - 1)

 # Adjust for a bimodal distribution to make sure that the starting points
 #   that are in the direction of muvec are extreme relative to the second
 #   mode.
 muvec = rep(0, d) # Position of second mode
 muvec[1] = mu
 # We use dot products for generality, although the generality isn't used in
 #   the current setup (second mode is displaced from the first mode only in
 #   the first coordinate).
 # GML 20200129: Ensure that we do matrix multiplication here.
 l = c(sum(qstart %*% cbind(muvec))) > 0 # Positive dot product of the
 #   starting vector with muvec: then we need to make it more extreme, so that
 #   it stays extreme with respect to the second mode at muvec.
 # GML 20210906: Ensure that muvec is added to rows, not columns.  Use a
 #   matrix function to make certain of it.
 if (sum(l) > 0) qstart[l,] =
  qstart[l,] + matrix(muvec, byrow = TRUE, nrow = sum(l), ncol = d)
 qstart
} # setstart

########################################
# Code to display diagnostics after the "Hmc" function is run: numbers of
#   trajectories where destination point was accepted by the trajectory's
#   Metropolis-Hastings test, and numbers of points on those trajectories.
# Comments in the "Main" R scripts about the number of points per trajectory
#   come from running this function.
Diag = function() {
 x = cbind(acceptsto, npointsto)
 round(cbind(x, x[, 2] / x[, 1]), 2)
}