# -*- Text -*- for GNU Emacs
##############################################################################
# R code for running near-perfect simulations from Hamiltonian Monte Carlo
# Main program for running the standard normal distribution, up to 100
#   dimensions, using the Full Random Uniform Trajectory Sampler (FRUTS)
# Author: George Leigh
# Copyright 2021 George Leigh
# Associated journal article: Leigh, G. M. and Northrop, A. R. (2021).
#   "Design of Hamiltonian Monte Carlo for coalescence to near-perfect
#   simulation of general continuous distributions".
# Citation of the journal article in any publication that uses the algorithms
#   published in it would be greatly appreciated by the authors.

# The programs in this project are free software: you can redistribute them
#   and/or modify them under the terms of the GNU General Public License as
#   published by the Free Software Foundation, either version 2 of the
#   License, or (at your option) any later version.
# The programs are distributed in the hope that they will be useful, but
#   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
#   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
#   for more details.
# You should have received a copy of the GNU General Public License along with
#   this program.  If not, see <https://www.gnu.org/licenses/>.

# This particular script does not contain code for the algorithms in the
#   article; it only loads functions defined in other scripts.  Nor does it
#   contain any significant amount of other intellectual property.

# This script loads the functions in this project's other scripts
#   "SetupFunctions.R", "Alg1_Hmc.R", "Alg2_FrutsTrajectory.R", "Alg3_Cftp.R"
#   and "AlgS1_Rocftp.R".  It then runs those functions with various settings.
# THIS SCRIPT IS INTENDED TO BE RUN INTERACTIVELY, ONE LINE AT A TIME.
#   RUNNING IT WITH THE "source" FUNCTION WILL TAKE A LONG TIME AND WILL LOSE
#   OUTPUT THAT IS INTENDED TO BE VIEWED INTERACTIVELY.

##############################################################################
# Load the scripts referred to above.
source("SetupFunctions.R")
source("Alg1_Hmc.R")
source("Alg2_FrutsTrajectory.R")
source("Alg3_Cftp.R")
source("AlgS1_Rocftp.R")

##############################################################################
# Run the standard normal distributions documented in the article.
# Displayed output from functions is arranged so that a final column of
#   single-digit zeros denotes coalescence.  Two decimal places in the final
#   column denotes success of the rounding step (i.e., acceptance of
#   Metropolis-Hastings update) but lack of coalescence from at least one of
#   the starting points (i.e., a different final result from the previous
#   starting points).  More than two decimal places usually means that the
#   rounding step failed in at least one of the simulations, but sometimes
#   results from numerical roundoff error.

# Coalescence indicators given as comments after the function calls: when all
#   runs coalesce, no comment is given.  When they don't, the number that
#   don't is noted.  Stars mark the settings with minimum number of iterations
#   that coalesce completely (and for which the previous 10 calls with higher
#   numbers also coalesce completely), and those that coalesce on 90% (18/20)
#   of the runs (which are used to set the block length for ROCFTP).
# Runs of "Hmc" instead of "HmcRound" show the differences in the HMC runs
#   prior to rounding.  The biggest differences are noted, together with the
#   rows on which they occur.
# Comments on number of points per trajectory come from running "Diag()".
#   They apply to the HMC call immediately above the "Diag" line.

######################################## Standard normal distribution

#################### 1-D
setglobal(d = 1)
rlist = setrandom(d = 1)
qstart = setstart(d = 1)

HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(37, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(36, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(35, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(34, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(33, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(32, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(31, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(30, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(29, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(28, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(27, rlist)) #*
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(26, rlist)) #1
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(25, rlist)) #2
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(24, rlist)) #2
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(23, rlist)) #2
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(22, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(21, rlist)) #2
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(20, rlist)) #1*
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(19, rlist)) #3
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(18, rlist)) #2
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(17, rlist)) #3
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(16, rlist)) #5
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(15, rlist)) #4
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(14, rlist)) #7

QS = Rocftp(UfuncNorm, UderivNorm, qstart, npast = 20, NS = 50000)
# Block  52724 , burnt in  1 , coalesced  50000 , max  4
# Max blocks between coalescence =  4
save(QS, file = "Normal_1_50000.RData")
c(mean(QS), sqrt(var(QS)))
# Mean = 0.00187 (predicted zero), predicted s.e. of mean = 1 / sqrt(50000) =
#   0.0045, good!
# S.d. = 1.00189 (predicted 1), predicted s.e. of s.d. ~ 0.5 / sqrt(49999) =
#   0.0022, good!
hist(QS, 50) # Histogram
qqnorm(QS, pch = ".", cex = 2) # QQ plot
# Serial trace plots to show that points are uncorrelated
plot(QS, cex = 0.5, pch = 3,
 main = "Standard normal, d = 1",
 xlab = "Sample number", ylab = "Sample value")
plot(QS[1:1000,], cex = 0.5, pch = 3,
 main = "Standard normal, d = 1",
 xlab = "Sample number", ylab = "Sample value")
plot(QS[1:1000,], QS[2:1001,], cex = 0.5, pch = 3,
 main = "Standard normal, d = 1",
 xlab = "Sample", ylab = "Next sample")
NS = nrow(QS)
cov(QS[1:(NS - 1),], QS[2:NS,])
# Covariance = 0.0028 (predicted zero), predicted s.e. ~ 1 / sqrt(49999) =
#   0.0045, good!

########## Save the results for the plot of number of coalesced results versus
#   number of trajectories shown in the Supplementary Material.
SNPlot = list()
for (iPlot in 1:30) SNPlot[[as.character(iPlot)]] = numeric(0) # Initialize.
for (iPlot in 1:30) {
 cat(iPlot, "\n")
 SNPlot[[as.character(iPlot)]] = HmcRound(UfuncNorm,
  UderivNorm, qstart, chooserandom(iPlot, rlist), nu = 4)
}
# Save results
save(SNPlot, file = "SNPlot.RData")

########## Plot the results for information here.  The code for the plot shown
#   in the Supplementary Material is in "Figs.R".
dset = 1
npastrocftp = 20 # From results above: number of trajectories used for ROCFTP
xcoal = as.numeric(names(SNPlot))
ycoal = rep(0, length(xcoal))
names(ycoal) = xcoal
maxabs = function(x) max(abs(x))
# Assume the definitive results are in the final element of SNPlot; i.e.,
#   components of SNPlot are in ascending order of number of trajectories.
# Use global variable CoalesceTol assigned in "SetupFunctions.R".
for (i in names(ycoal)) { # Character; don't need numeric version
 ycoal[[i]] = sum(
  apply(SNPlot[[i]] - SNPlot[[length(SNPlot)]], 1, maxabs) <= CoalesceTol)
}
plot(xcoal, ycoal, xaxs = "i", yaxs = "i", type = "l",
 xlab = "Number of trajectories into the past",
 ylab = "Number of coalesced run–starting-point combinations",
 main = paste("Standard normal, d =", dset))
lines(c(npastrocftp, npastrocftp), c(0, max(ycoal)), lty = 2)

#################### 2-D: Doesn't appear in article but we want to check it
#   against the correlated normal with rho = 0.
setglobal(d = 2)
rlist = setrandom(d = 2)
qstart = setstart(d = 2)

HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(50, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(40, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(39, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(38, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(37, rlist)) #1
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(36, rlist)) #1
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(35, rlist)) #1
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(34, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(33, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(32, rlist)) #1
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(31, rlist)) #2
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(30, rlist)) #2

#################### 10-D
setglobal(d = 10)
rlist = setrandom(d = 10)
qstart = setstart(d = 10)

HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(75, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(74, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(73, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(72, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(71, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(70, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(69, rlist)) 
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(68, rlist)) 
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(67, rlist)) 
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(66, rlist)) 
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(65, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(64, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(63, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(62, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(61, rlist)) #*
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(60, rlist)) #1
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(59, rlist)) #1
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(58, rlist)) #2
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(57, rlist)) #1
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(56, rlist)) #2
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(55, rlist)) #2
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(54, rlist)) #2
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(53, rlist)) #2*
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(52, rlist)) #3
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(51, rlist)) #4
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(50, rlist)) #3

QS = Rocftp(UfuncNorm, UderivNorm, qstart, npast = 53, NS = 1000)
# Block  1147 , burnt in  1 , coalesced  1000 , max  4 
# Max blocks between coalescence events =  4 
save(QS, file = "Normal_10_1000.RData")
apply(QS, 2, mean)
sqrt(apply(QS, 2, var))
# Predicted s.e. of mean = 1 / sqrt(1000) = 0.032, good!
# Predicted s.e. of s.d. ~ 0.5 / sqrt(999) = 0.0168, good!
hist(c(QS), 50) # Histogram
qqnorm(c(QS), pch = ".", cex = 2) # QQ plot
# Serial trace plots to show that points are uncorrelated
plot(QS[, 1], cex = 0.5, pch = 3,
 main = "Standard normal, d = 10, component 1",
 xlab = "Sample number", ylab = "Sample value")
NS = nrow(QS)
plot(QS[1:(NS - 1), 1], QS[2:NS, 1], cex = 0.5, pch = 3,
 main = "Standard normal, d = 10, component 1",
 xlab = "Sample", ylab = "Next sample")
plot(QS[1:(NS - 1), 6], QS[2:NS, 6], cex = 0.5, pch = 3,
 main = "Standard normal, d = 10, component 6",
 xlab = "Sample", ylab = "Next sample")
cov(QS[1:(NS - 1), 1], QS[2:NS, 1])
# Covariance = 0.0024 (predicted zero), predicted s.e. ~ 1 / sqrt(999) =
#   0.0032, good!

#################### 100-D
setglobal(d = 100)
rlist = setrandom(d = 100)
qstart = setstart(d = 100)

HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(120, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(110, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(100, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(90, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(80, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(79, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(78, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(77, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(76, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(75, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(74, rlist))
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(73, rlist)) #*
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(72, rlist)) #1
Hmc(UfuncNorm, UderivNorm, qstart, chooserandom(72, rlist)) # 38: 0.00065
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(70, rlist)) #1
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(65, rlist)) #1
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(60, rlist)) #1
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(59, rlist)) #1
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(58, rlist)) #2
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(57, rlist)) #2
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(56, rlist)) #1
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(55, rlist)) #2
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(54, rlist)) #2
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(53, rlist)) #2*
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(52, rlist)) #5
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(51, rlist)) #4
HmcRound(UfuncNorm, UderivNorm, qstart, chooserandom(50, rlist)) #7

QS = Rocftp(UfuncNorm, UderivNorm, qstart, npast = 53, NS = 1000)
# Block  1415 , burnt in  1 , coalesced  1000 , max  8 
# Max blocks between coalescence events =  8
save(QS, file = "Normal_100_1000.RData")