# -*- Text -*- for GNU Emacs ############################################################################## # R code for setting up coupling from the past (CFTP) with Hamiltonian Monte # Carlo (HMC). This script comprises the functions "setglobal" to set # global variables, "setrandom" to assign random numbers, "chooserandom" to # select a subset of random numbers, and "setstart" to generate starting # points. # Author: George Leigh # Copyright 2021 George Leigh # Associated journal article: Leigh, G. M. and Northrop, A. R. (2021). # "Design of Hamiltonian Monte Carlo for coalescence to near-perfect # simulation of general continuous distributions". # Citation of the journal article in any publication that uses the algorithms # published in it would be greatly appreciated by the authors. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published # by the Free Software Foundation, either version 2 of the License, # or (at your option) any later version. # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see # . ############################################################################## ######################################## Set global variables, which are not # expected to change throughout the running of this code for the results # shown in the article. setglobal = function(d = 1) { nrunglobal <<- 20 # Number of independent runs (sets of random numbers) maxpoint <<- 1000 # Maximum number of points that we are allowed to construct # on each side of a trajectory CoalesceTol <<- 1.0e-6 # Tolerance for coalescence: purely to cover numerical # round-off error: coalescence is exact if the computer's arithmetic is. # The numerical errors are surprisingly large in our examples. Note that # the target distribution is intended to be set to have unit variance, so # this tolerance is the fraction of a standard deviation. # Random-number column indices jcount = 0 jcount = jcount + 1; jK <<- jcount # Kinetic energy K jcount = jcount + 1; jsel <<- jcount # Trajectory destination point jcount = jcount + 1; jMH <<- jcount # Trajectory Metropolis-Hastings test jcount = jcount + 1; jMHro <<- jcount # Rounding Metropolis-Hastings test nsing <<- jcount # Number of columns of single random numbers jmom <<- jcount + (1:d); jcount = jcount + d # Momentum vector p jdir <<- jcount + (1:d); jcount = jcount + d # Direction vector jro <<- jcount + (1:d); jcount = jcount + d # Rounding congruences ncolsim <<- jcount # Total number of columns of random numbers # Additional global variables are created in function "Hmc" in script # "Alg1_Hmc.R": # - acceptsto: Number of trajectories accepted by Metropolis-Hastings tests # - npointsto: Number of points in those trajectories } # setglobal ######################################## Set the values of the kinetic energy # parameter beta, and generate the random numbers. Note: The paper finds # that beta = 2 for all iterations works best, but that wasn't apparent # before the research was conducted! setrandom = function(d = 1, betaset = 2, alpha = 2, h = 0.05, nrun = nrunglobal, maxpast = 2000, w = 0.01, seed = 1) { # Function arguments: # - d: Number of dimensions # - betaset: Vector of values of beta to set (see description below of how # long it can be) # - alpha: Parameter to characterize the tails of the target distribution: # short-tailed distribution has alpha = 2; long-tailed distribution has # alpha < 2. See Supplementary section S2. # - h: Parameter for setting the time step (reciprocal of desired number of # points in each trajectory) # - nrun: Number of runs: typically takes the value of the global variable # "nrunglobal", unless being called by ROCFTP, in which case it should take # the value 1 because this function will be called once for each ROCFTP # block. # - maxpast: Maximum allowed number of iterations (HMC trajectories): the # random numbers will change if a different value of maxpast is used. # Values higher than 2000 (2500 and 4000) are used for some of the # multivariate t-distributions described in the article. # - w: Width for final rounding, to make coalescence exact # - seed: Random number seed (argument to function "set.seed") nrvec = nrun * maxpast # Total number of random vectors to generate betavec = rep(2, maxpast) # Initialise all beta values to 2. # Note that R's routine "rgamma" uses a variable number of random numbers # depending on its shape parameter, which is derived from beta. Therefore # the values of beta are important to the reproducibility of the random # numbers. Also for that reason, I have generated the kinetic-energy # random numbers last, after all of the others, and reversed the order of # them, with the expectation that if values of beta less than 2 are used, # they will be early in the sequence, and all the late ones will be beta = # 2 (the only value which we have found to give coalescence). # The length of betaset does not have to match that of betavec. If betaset # is shorter, it is used to set the late values of beta. The early values # remain set to 2. If betaset is longer, only the last maxpast values will # be used. betalen = length(betaset) nbetaset = min(betalen, maxpast) # Number of values to set betavec[(maxpast - nbetaset + 1):maxpast] = betaset[(betalen - nbetaset + 1):betalen] # Expand the beta vector to cover the different sets of random numbers # (runs). Note: This is only for generating the random numbers. This # function will return the unexpanded vector. betarep = rep(betavec, nrun) # Formula for the time step (see article and derivation in Supplementary # section S2). dtvec = 2 * h * alpha^(1 / alpha) * betavec^(1 / betavec - 1.0) * exp(lgamma(d / betavec) - lgamma((d - 1) / betavec + 1.0) + lgamma(d / alpha + (d - 1) / betavec + 1.0) - lgamma((d - 1) / alpha + (d - 1) / betavec + 1.0)) # Generate the random numbers. # GML 20210905: Ensure that we use a uniform distribution for the rounding # random numbers, not a normal distribution. This will make slight changes # to the values of the results, and to whether the rounded values are # accepted by the final Metropolis-Hastings test for the rounding. # Therefore there will be slight changes to the number of iterations taken # to coalesce, which means the results have to be redone :( . They will # generally improve, as some of the normal numbers would have been large # and resulted in Metropolis-Hastings rejection which may no longer occur. # A normal random variable takes two random numbers, whereas a uniform one # takes only one. We could duplicate the generation of the rounding random # numbers, in order to make minimal changes to the results from the earlier # version. However, given that the results will change anyway, it doesn't # seem worthwhile. # Zero or negative seed means use existing setting of random number seed. We # need that for ROCFTP, so as to repeat the same set of random numbers on # the next call. if (seed > 0) set.seed(seed) rsim = array(0, dim = c(nrvec, ncolsim)) # Matrix to hold the simulations # I have checked that the rgamma routine correctly uses a vector for the # shape parameter, even though its help doesn't mention that. It doesn't # work for matrices though! # We do the gamma random variables (column jK) last, because the number of # random numbers they consume depends on the beta value. I've also changed # the order of them, to preserve the late rows if the beta value for them # stays the same but the beta value for the early rows changes. This is a # bit complex. We have to reverse the order within each run, and do the # simulations for all the late rows (combined over all runs) before the # early ones. I do this with R's matrix function, reading by rows instead # of columns. # It is safer to do each set of columns separately, rather than # simulate multiple sets with the same function call. rsim[, jsel] = runif(nrvec) rsim[, jMH] = runif(nrvec) rsim[, jMHro] = runif(nrvec) rsim[, jmom] = rnorm(d * nrvec) # 2 random numbers per random variable rsim[, jdir] = rnorm(d * nrvec) # 2 random numbers per random variable rsim[, jro] = runif(d * nrvec) #rsim[, jro] = runif(d * nrvec) # Repeat to make the random numbers the same # as the old version of the code, so that we don't have to redo all the # results in the article (didn't work well enough so commented out). # Finally, simulate K using gamma distributions (see above for why this is # tricky). rsim[, jK] = rev(c(matrix(rgamma(nrvec, shape = d / c(matrix(rev(betarep), byrow = TRUE, ncol = maxpast, nrow = nrun))), byrow = TRUE, nrow = maxpast, ncol = nrun))) # Convert the directions of the momentum p and the direction vector b from # normal random variables to directions (unit vectors). Also multiply the # direction unit vector of p by the magnitude calculated from K. I have # used d random numbers to generate a unit vector, where it would be # possible, with a lot more work, to use only (d - 1) random numbers. My # way uses the statistical result that independent standard normal # coordinates provide a multivariate standard normal distribution, so the # distribution of its unit vector must be uniform on the unit sphere. f = function(x) x / sqrt(sum(x^2)) # The cbind function below is for when d = 1, to force it into a matrix. rsim[, jmom] = rep((betarep * rsim[, jK])^(1 / betarep), d) * t(apply(cbind(rsim[, jmom]), 1, f)) rsim[, jdir] = t(apply(cbind(rsim[, jdir]), 1, f)) # Convert the rounding congruences of q from uniform on the interval # [0, 1] to uniform on [0, w]. rsim[, jro] = w * rsim[, jro] list(beta = betavec, dt = dtvec, rsim = rsim) } # setrandom ######################################## Choose a specified number of # trajectories to simulate, which is usually less than the maximum allowed # number. The function returns simply a logical vector of which ones to # pick. chooserandom = function(npast, rlist) { # Function arguments: # - npast: Desired number of trajectories: must be less than or equal to # length(rlist$beta). # - rlist: List matching the output of "setrandom" above: components are # - - beta: Vector of beta values, of length maxpast # - - dt: Vector of time steps, of length maxpast # - - rsim: Matrix of random numbers, number of rows = nrun * maxpast nrvec = nrow(rlist$rsim) # Total number of rows of random numbers, covering # both different trajectories and different runs maxpast = length(rlist$beta) if (nrvec %% maxpast != 0 | length(rlist$dt) != maxpast) stop( "Numbers of trajectories do not match.") if (npast > maxpast) stop("Too many trajectories requested") nrun = nrvec / maxpast lchoose = (1:maxpast) > maxpast - npast + 0.1 # Choose the last npast # trajectories. list(beta = rlist$beta[lchoose], dt = rlist$dt[lchoose], rsim = rlist$rsim[rep(lchoose, times = nrun),]) } # chooserandom ######################################## Set the starting points. # Note: If I had my time again, I would set the random number seed here, to a # value other than that used above for the HMC random numbers. Here, # however, we assume that "setrandom" has been run before this function, and # we continue the random numbers from there. setstart = function(d = 1, mu = 0) { # Arguments: d = number of dimensions; mu = first-coordinate position of # second mode (first mode is assumed to be always at zero, and second mode # is assumed to be at zero in all coordinates except the first) # Set mu = 0 for unimodal distributions. dlim = min(d, 5) # Limited value of d, to keep the number of starting points # manageable nstart = 2^dlim + 1 # Number of different starting points qstart = array(0, dim = c(nstart, d)) # Initialise matrix of starting points. # Last starting point is set to zeros (intended to be the maximum likelihood # point), so there is no need to change the initial values for that one. # Deterministic starting values for the first dlim coordinates for (istart in 1:(2^dlim)) { x = istart - 1 xdig = rep(0, dlim) # Vector to store the binary digits of x (0 or 1) for (j in 1:dlim) { xdig[j] = x %% 2 x = x %/% 2 } qstart[istart, 1:dlim] = 6.0 * (2 * xdig - 1) # Convert from (0, 1) to (-1, # 1), and multiply by 6 to get extreme values. Note coordinates are # intended to have been scaled to have unit variance and zero at the # maximum-likelihood point. } # Random starting values for the other coordinates, if any if (d > dlim) for (istart in 1:(nstart - 1)) # Omit last starting point which is zero. qstart[istart, (dlim + 1):d] = 6.0 * (2 * round(runif(d - dlim)) - 1) # Adjust for a bimodal distribution to make sure that the starting points # that are in the direction of muvec are extreme relative to the second # mode. muvec = rep(0, d) # Position of second mode muvec[1] = mu # We use dot products for generality, although the generality isn't used in # the current setup (second mode is displaced from the first mode only in # the first coordinate). # GML 20200129: Ensure that we do matrix multiplication here. l = c(sum(qstart %*% cbind(muvec))) > 0 # Positive dot product of the # starting vector with muvec: then we need to make it more extreme, so that # it stays extreme with respect to the second mode at muvec. # GML 20210906: Ensure that muvec is added to rows, not columns. Use a # matrix function to make certain of it. if (sum(l) > 0) qstart[l,] = qstart[l,] + matrix(muvec, byrow = TRUE, nrow = sum(l), ncol = d) qstart } # setstart ######################################## # Code to display diagnostics after the "Hmc" function is run: numbers of # trajectories where destination point was accepted by the trajectory's # Metropolis-Hastings test, and numbers of points on those trajectories. # Comments in the "Main" R scripts about the number of points per trajectory # come from running this function. Diag = function() { x = cbind(acceptsto, npointsto) round(cbind(x, x[, 2] / x[, 1]), 2) }