# -*- Text -*- for GNU Emacs ############################################################################## # R code for running near-perfect simulations from Hamiltonian Monte Carlo # Main program for running the standard normal distribution, up to 100 # dimensions, using the Full Random Uniform Trajectory Sampler (FRUTS) # Author: George Leigh # Copyright 2021 George Leigh # Associated journal article: Leigh, G. M. and Northrop, A. R. (2021). # "Design of Hamiltonian Monte Carlo for coalescence to near-perfect # simulation of general continuous distributions". # Citation of the journal article in any publication that uses the algorithms # published in it would be greatly appreciated by the authors. # The programs in this project are free software: you can redistribute them # and/or modify them under the terms of the GNU General Public License as # published by the Free Software Foundation, either version 2 of the # License, or (at your option) any later version. # The programs are distributed in the hope that they will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # You should have received a copy of the GNU General Public License along with # this program. If not, see . # This particular script does not contain code for the algorithms in the # article; it only loads functions defined in other scripts. Nor does it # contain any significant amount of other intellectual property. # This script loads the functions in this project's other scripts # "SetupFunctions.R", "Alg1_Hmc.R", "Alg2_FrutsTrajectory.R", "Alg3_Cftp.R" # and "AlgS1_Rocftp.R". It then runs those functions with various settings. # THIS SCRIPT IS INTENDED TO BE RUN INTERACTIVELY, ONE LINE AT A TIME. # RUNNING IT WITH THE "source" FUNCTION WILL TAKE A LONG TIME AND WILL LOSE # OUTPUT THAT IS INTENDED TO BE VIEWED INTERACTIVELY. ############################################################################## # Load the scripts referred to above. source("SetupFunctions.R") source("Alg1_Hmc.R") source("Alg2_FrutsTrajectory.R") source("Alg3_Cftp.R") source("AlgS1_Rocftp.R") ############################################################################## # Run the standard normal distributions documented in the article. # Displayed output from functions is arranged so that a final column of # single-digit zeros denotes coalescence. Two decimal places in the final # column denotes success of the rounding step (i.e., acceptance of # Metropolis-Hastings update) but lack of coalescence from at least one of # the starting points (i.e., a different final result from the previous # starting points). More than two decimal places usually means that the # rounding step failed in at least one of the simulations, but sometimes # results from numerical roundoff error. # Coalescence indicators given as comments after the function calls: when all # runs coalesce, no comment is given. When they don't, the number that # don't is noted. Stars mark the settings with minimum number of iterations # that coalesce completely (and for which the previous 10 calls with higher # numbers also coalesce completely), and those that coalesce on 90% (18/20) # of the runs (which are used to set the block length for ROCFTP). # Runs of "Hmc" instead of "HmcRound" show the differences in the HMC runs # prior to rounding. The biggest differences are noted, together with the # rows on which they occur. # Comments on number of points per trajectory come from running "Diag()". # They apply to the HMC call immediately above the "Diag" line. ######################################## Standard normal distribution #################### 1-D setglobal(d = 1) rlist = setrandom(d = 1) qstart = setstart(d = 1) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(37, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(36, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(35, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(34, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(33, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(32, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(31, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(30, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(29, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(28, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(27, rlist)) #* HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(26, rlist)) #1 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(25, rlist)) #2 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(24, rlist)) #2 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(23, rlist)) #2 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(22, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(21, rlist)) #2 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(20, rlist)) #1* HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(19, rlist)) #3 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(18, rlist)) #2 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(17, rlist)) #3 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(16, rlist)) #5 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(15, rlist)) #4 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(14, rlist)) #7 QS = Rocftp(UfuncNorm, UderivNorm, qstart, npast = 20, NS = 50000) # Block 52724 , burnt in 1 , coalesced 50000 , max 4 # Max blocks between coalescence = 4 save(QS, file = "Normal_1_50000.RData") c(mean(QS), sqrt(var(QS))) # Mean = 0.00187 (predicted zero), predicted s.e. of mean = 1 / sqrt(50000) = # 0.0045, good! # S.d. = 1.00189 (predicted 1), predicted s.e. of s.d. ~ 0.5 / sqrt(49999) = # 0.0022, good! hist(QS, 50) # Histogram qqnorm(QS, pch = ".", cex = 2) # QQ plot # Serial trace plots to show that points are uncorrelated plot(QS, cex = 0.5, pch = 3, main = "Standard normal, d = 1", xlab = "Sample number", ylab = "Sample value") plot(QS[1:1000,], cex = 0.5, pch = 3, main = "Standard normal, d = 1", xlab = "Sample number", ylab = "Sample value") plot(QS[1:1000,], QS[2:1001,], cex = 0.5, pch = 3, main = "Standard normal, d = 1", xlab = "Sample", ylab = "Next sample") NS = nrow(QS) cov(QS[1:(NS - 1),], QS[2:NS,]) # Covariance = 0.0028 (predicted zero), predicted s.e. ~ 1 / sqrt(49999) = # 0.0045, good! ########## Save the results for the plot of number of coalesced results versus # number of trajectories shown in the Supplementary Material. SNPlot = list() for (iPlot in 1:30) SNPlot[[as.character(iPlot)]] = numeric(0) # Initialize. for (iPlot in 1:30) { cat(iPlot, "\n") SNPlot[[as.character(iPlot)]] = HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(iPlot, rlist), nu = 4) } # Save results save(SNPlot, file = "SNPlot.RData") ########## Plot the results for information here. The code for the plot shown # in the Supplementary Material is in "Figs.R". dset = 1 npastrocftp = 20 # From results above: number of trajectories used for ROCFTP xcoal = as.numeric(names(SNPlot)) ycoal = rep(0, length(xcoal)) names(ycoal) = xcoal maxabs = function(x) max(abs(x)) # Assume the definitive results are in the final element of SNPlot; i.e., # components of SNPlot are in ascending order of number of trajectories. # Use global variable CoalesceTol assigned in "SetupFunctions.R". for (i in names(ycoal)) { # Character; don't need numeric version ycoal[[i]] = sum( apply(SNPlot[[i]] - SNPlot[[length(SNPlot)]], 1, maxabs) <= CoalesceTol) } plot(xcoal, ycoal, xaxs = "i", yaxs = "i", type = "l", xlab = "Number of trajectories into the past", ylab = "Number of coalesced run–starting-point combinations", main = paste("Standard normal, d =", dset)) lines(c(npastrocftp, npastrocftp), c(0, max(ycoal)), lty = 2) #################### 2-D: Doesn't appear in article but we want to check it # against the correlated normal with rho = 0. setglobal(d = 2) rlist = setrandom(d = 2) qstart = setstart(d = 2) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(50, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(40, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(39, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(38, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(37, rlist)) #1 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(36, rlist)) #1 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(35, rlist)) #1 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(34, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(33, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(32, rlist)) #1 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(31, rlist)) #2 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(30, rlist)) #2 #################### 10-D setglobal(d = 10) rlist = setrandom(d = 10) qstart = setstart(d = 10) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(75, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(74, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(73, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(72, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(71, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(70, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(69, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(68, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(67, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(66, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(65, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(64, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(63, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(62, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(61, rlist)) #* HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(60, rlist)) #1 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(59, rlist)) #1 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(58, rlist)) #2 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(57, rlist)) #1 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(56, rlist)) #2 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(55, rlist)) #2 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(54, rlist)) #2 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(53, rlist)) #2* HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(52, rlist)) #3 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(51, rlist)) #4 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(50, rlist)) #3 QS = Rocftp(UfuncNorm, UderivNorm, qstart, npast = 53, NS = 1000) # Block 1147 , burnt in 1 , coalesced 1000 , max 4 # Max blocks between coalescence events = 4 save(QS, file = "Normal_10_1000.RData") apply(QS, 2, mean) sqrt(apply(QS, 2, var)) # Predicted s.e. of mean = 1 / sqrt(1000) = 0.032, good! # Predicted s.e. of s.d. ~ 0.5 / sqrt(999) = 0.0168, good! hist(c(QS), 50) # Histogram qqnorm(c(QS), pch = ".", cex = 2) # QQ plot # Serial trace plots to show that points are uncorrelated plot(QS[, 1], cex = 0.5, pch = 3, main = "Standard normal, d = 10, component 1", xlab = "Sample number", ylab = "Sample value") NS = nrow(QS) plot(QS[1:(NS - 1), 1], QS[2:NS, 1], cex = 0.5, pch = 3, main = "Standard normal, d = 10, component 1", xlab = "Sample", ylab = "Next sample") plot(QS[1:(NS - 1), 6], QS[2:NS, 6], cex = 0.5, pch = 3, main = "Standard normal, d = 10, component 6", xlab = "Sample", ylab = "Next sample") cov(QS[1:(NS - 1), 1], QS[2:NS, 1]) # Covariance = 0.0024 (predicted zero), predicted s.e. ~ 1 / sqrt(999) = # 0.0032, good! #################### 100-D setglobal(d = 100) rlist = setrandom(d = 100) qstart = setstart(d = 100) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(120, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(110, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(100, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(90, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(80, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(79, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(78, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(77, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(76, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(75, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(74, rlist)) HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(73, rlist)) #* HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(72, rlist)) #1 Hmc(UfuncNorm, UderivNorm, qstart, chooserandom(72, rlist)) # 38: 0.00065 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(70, rlist)) #1 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(65, rlist)) #1 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(60, rlist)) #1 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(59, rlist)) #1 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(58, rlist)) #2 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(57, rlist)) #2 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(56, rlist)) #1 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(55, rlist)) #2 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(54, rlist)) #2 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(53, rlist)) #2* HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(52, rlist)) #5 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(51, rlist)) #4 HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(50, rlist)) #7 QS = Rocftp(UfuncNorm, UderivNorm, qstart, npast = 53, NS = 1000) # Block 1415 , burnt in 1 , coalesced 1000 , max 8 # Max blocks between coalescence events = 8 save(QS, file = "Normal_100_1000.RData")