From fbf5a17ed1bf844276e89da3843aee0b4ea96c4d Mon Sep 17 00:00:00 2001 From: Jon Date: Wed, 3 May 2023 21:55:22 +0200 Subject: [PATCH 01/24] Added default arguments to mjmcmc and gmjmcmc. Added some rudimentary tests for mjmcmc and gmjmcmc, which have TODO listed where we need to implement stuff. Split arg gen funcs for mjmcmc and gmjmcmc. --- NAMESPACE | 5 +- R/arguments.R | 115 ++++++++++++++++++----------- R/gmjmcmc.R | 53 +++++++------ R/likelihoods.R | 18 ++--- R/mjmcmc.R | 7 +- R/parallel.R | 2 +- man/gaussian.loglik.Rd | 2 +- man/gaussian.loglik.bic.irlssgd.Rd | 8 +- man/gen.params.gmjmcmc.Rd | 14 ++++ man/gen.params.list.Rd | 16 ---- man/gen.params.mjmcmc.Rd | 22 ++++++ man/gen.probs.gmjmcmc.Rd | 14 ++++ man/gen.probs.list.Rd | 15 ---- man/gen.probs.mjmcmc.Rd | 11 +++ man/gmjmcmc.Rd | 10 +-- man/linear.g.prior.loglik.Rd | 2 +- man/logistic.loglik.Rd | 2 +- man/mjmcmc.Rd | 2 +- tests/testthat/test_mjmcmc.R | 38 ++++++++++ vignettes/GMJMCMC-guide.Rmd | 2 +- 20 files changed, 235 insertions(+), 123 deletions(-) create mode 100644 man/gen.params.gmjmcmc.Rd delete mode 100644 man/gen.params.list.Rd create mode 100644 man/gen.params.mjmcmc.Rd create mode 100644 man/gen.probs.gmjmcmc.Rd delete mode 100644 man/gen.probs.list.Rd create mode 100644 man/gen.probs.mjmcmc.Rd create mode 100644 tests/testthat/test_mjmcmc.R diff --git a/NAMESPACE b/NAMESPACE index f4cedaa..04b17d0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,8 +10,9 @@ export(gauss) export(gaussian.loglik) export(gaussian.loglik.alpha) export(gaussian.loglik.bic.irlssgd) -export(gen.params.list) -export(gen.probs.list) +export(gen.params.gmjmcmc) +export(gen.params.mjmcmc) +export(gen.probs.gmjmcmc) export(gmjmcmc) export(gmjmcmc.iact) export(gmjmcmc.parallel) diff --git a/R/arguments.R b/R/arguments.R index 5cb4abe..5200863 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -3,15 +3,10 @@ # Created by: jonlachmann # Created on: 2021-02-19 -#' Generate a probability list for (G)MJMCMC +#' Generate a probability list for MJMCMC #' -#' @param transforms A list of the transformations used (to get the count), -#' default is false and gives a MJMCMC prob list. -#' -#' @export gen.probs.list -gen.probs.list <- function (transforms=F) { - ### Create a probability list for algorithm - +#' @export gen.probs.gmjmcmc +gen.probs.mjmcmc <- function () { ## Mode jumping algorithm probabilities large <- 0.05 # probability of a large jump large.kern <- c(0, 0, 0, 1) # probability for type of large jump, only allow type 1-4 @@ -23,28 +18,47 @@ gen.probs.list <- function (transforms=F) { probs <- list(large=large, large.kern=large.kern, localopt.kern=localopt.kern, random.kern=random.kern, mh=mh) + return(probs) +} + +#' Generate a probability list for GMJMCMC +#' +#' @param transforms A list of the transformations used (to get the count). +#' +#' @export gen.probs.gmjmcmc +gen.probs.gmjmcmc <- function (transforms) { + if (class(transforms) != "character") + stop("The argument transforms must be a character vector specifying the transformations.") + + # Get probs for mjmcmc + probs <- gen.probs.mjmcmc() + ## Feature generation probabilities - if (transforms[1] != FALSE) { - transcount <- length(transforms) - filter <- 0.6 # filtration threshold - gen <- rep(1/4, 4) # probability for different feature generation methods - trans <- rep(1/transcount, transcount) # probability for each different nonlinear transformation + transcount <- length(transforms) + filter <- 0.6 # filtration threshold + gen <- rep(1 / 4, 4) # probability for different feature generation methods + trans <- rep(1 / transcount, transcount) # probability for each different nonlinear transformation - probs$filter <- filter - probs$gen <- gen - probs$trans <- trans - } + probs$filter <- filter + probs$gen <- gen + probs$trans <- trans return(probs) } -#' Generate a parameter list for (G)MJMCMC +#' Generate a parameter list for MJMCMC #' #' @param data The dataset that will be used in the algorithm -#' @param G True if we want parameters for GMJMCMC, false for MJMCMC #' -#' @export gen.params.list -gen.params.list <- function (data, G=F) { +#' @return A list of parameters to use when running the MJMCMC algorithm. +#' +#' TODO: WRITE MORE +#' +#' Note that the $loglik item is an empty list, which is passed to the log likelihood function of the model, +#' intended to store parameters that the estimator function should use. +#' +#' @export gen.params.mjmcmc +gen.params.mjmcmc <- function (data) { ### Create a list of parameters for the algorithm ## Get the dimensions of the data to set parameters based on it @@ -62,32 +76,45 @@ gen.params.list <- function (data, G=F) { ## MJMCMC parameters burn_in <- 100 # TODO - large_params <- list(neigh.size=as.integer(ncov*0.35), - neigh.min=as.integer(ncov*0.25), - neigh.max=as.integer(ncov*0.45)) # Large jump parameters - random_params <- list(neigh.size=1, neigh.min=1, neigh.max=2) # Small random jump parameters - mh_params <- list(neigh.size=1, neigh.min=1, neigh.max=2) # Regular MH parameters + + # Large jump parameters + large_params <- list( + neigh.size = as.integer(ncov * 0.35), + neigh.min = as.integer(ncov * 0.25), + neigh.max = as.integer(ncov * 0.45) + ) + random_params <- list(neigh.size = 1, neigh.min = 1, neigh.max = 2) # Small random jump parameters + mh_params <- list(neigh.size = 1, neigh.min = 1, neigh.max = 2) # Regular MH parameters ## Compile the list and return params <- list(burn_in=burn_in, mh=mh_params, large=large_params, random=random_params, sa=sa_params, greedy=greedy_params, loglik=list()) - # Add GMJMCMC specific parameters - if (G) { - ## GM parameters - feat_params <- list(D = 5, L = 15, # Hard limits on feature complexity - alpha = 0, # alpha strategy (0=None, 1,2,3=strategies as per Hubin et al.) TODO: Fully Bayesian - pop.max = as.integer(ncov * 1.5), # Max features population size - keep.org = F, # Always keep original covariates in every population - prel.filter = 0, # Filtration threshold for first population (i.e. filter covariates even if keep.org=T) - keep.min = 0.8, # Minimum proportion of features to always keep [0,1] - eps = 0.05, # Inclusion probability limit for feature generation - check.col = T, # Whether the colinearity should be checked - max.proj.size = 15) # Maximum projection size - params$feat <- feat_params - params$rescale.large <- F - params$prel.filter <- NULL # Specify which covariates to keep in the first population. See Issue #15. - - } + return(params) +} + +#' Generate a parameter list for GMJMCMC +#' +#' @param data The dataset that will be used in the algorithm +#' +#' @export gen.params.gmjmcmc +gen.params.gmjmcmc <- function (data) { + # Get mjmcmc params + params <- gen.params.mjmcmc(data) + + ncov <- ncol(data) - 2 + + feat_params <- list(D = 5, L = 15, # Hard limits on feature complexity + alpha = 0, # alpha strategy (0=None, 1,2,3=strategies as per Hubin et al.) TODO: Fully Bayesian + pop.max = as.integer(ncov * 1.5), # Max features population size + keep.org = F, # Always keep original covariates in every population + prel.filter = 0, # Filtration threshold for first population (i.e. filter covariates even if keep.org=T) + keep.min = 0.8, # Minimum proportion of features to always keep [0,1] + eps = 0.05, # Inclusion probability limit for feature generation + check.col = T, # Whether the colinearity should be checked + max.proj.size = 15) # Maximum projection size + params$feat <- feat_params + params$rescale.large <- F + params$prel.filter <- NULL # Specify which covariates to keep in the first population. See Issue #15. return(params) -} \ No newline at end of file +} diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index 34bfc81..1e90991 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -17,7 +17,7 @@ NULL #' @param loglik.pi The (log) density to explore #' @param loglik.alpha The likelihood function to use for alpha calculation #' @param transforms A list of the available nonlinear transformations for feature generation -#' @param T The number of population iterations +#' @param P The number of population iterations #' @param N.init The number of iterations per population (total iterations = (T-1)*N.init+N.final) #' @param N.final The number of iterations for the final population (total iterations = (T-1)*N.init+N.final) #' @param probs A list of the various probability vectors to use @@ -25,24 +25,29 @@ NULL #' @param sub An indicator that if the likelihood is inexact and should be improved each model visit (EXPERIMENTAL!) #' #' @export gmjmcmc -gmjmcmc <- function (data, loglik.pi, loglik.alpha, transforms, T, N.init, N.final, probs, params, sub=F) { +gmjmcmc <- function (data, loglik.pi, loglik.alpha, transforms, P = 10, N.init = 100, N.final = 100, probs = NULL, params = NULL, sub = FALSE) { # Verify that the data is well-formed data <- check.data(data) + + # Generate default probabilities and parameters if there are none supplied. + if (is.null(probs)) probs <- gen.probs.gmjmcmc(transforms) + if (is.null(params)) params <- gen.params.gmjmcmc(data) + # Extract labels from column names in dataframe labels <- get.labels(data) # Set the transformations option options("gmjmcmc-transformations" = transforms) # Acceptance probability per population - accept <- vector("list", T) + accept <- vector("list", P) accept <- lapply(accept, function (x) x <- 0) # A list of populations that have been visited - S <- vector("list", T) + S <- vector("list", P) # A list of models that have been visited, refering to the populations - models <- vector("list", T) + models <- vector("list", P) # A list of all the marginal probabilities for the features, per population - marg.probs <- vector("list", T) + marg.probs <- vector("list", P) # A list of all the best marginal model likelihoods, per population - best.margs <- vector("list", T) + best.margs <- vector("list", P) # Create first population F.0 <- gen.covariates(ncol(data) - 2) @@ -53,47 +58,47 @@ gmjmcmc <- function (data, loglik.pi, loglik.alpha, transforms, T, N.init, N.fin complex <- complex.features(S[[1]]) - ### Main algorithm loop - Iterate over T different populations - for (t in seq_len(T)) { + ### Main algorithm loop - Iterate over P different populations + for (p in seq_len(P)) { # Set population iteration count - if (t != T) N <- N.init + if (p != P) N <- N.init else N <- N.final # Precalculate covariates and put them in data.t - if (t != 1) data.t <- precalc.features(data, S[[t]]) + if (p != 1) data.t <- precalc.features(data, S[[p]]) else data.t <- data # Initialize first model of population - model.cur <- as.logical(rbinom(n = length(S[[t]]), size = 1, prob = 0.5)) + model.cur <- as.logical(rbinom(n = length(S[[p]]), size = 1, prob = 0.5)) model.cur.res <- loglik.pre(loglik.pi, model.cur, complex, data.t, params$loglik) model.cur <- list(prob = 0, model = model.cur, coefs = model.cur.res$coefs, crit = model.cur.res$crit, alpha = 0) best.crit <- model.cur$crit # Reset first best criteria value # Run MJMCMC over the population - cat(paste("Population", t, "begin.")) + cat(paste("Population", p, "begin.")) mjmcmc_res <- mjmcmc.loop(data.t, complex, loglik.pi, model.cur, N, probs, params, sub) - cat(paste("\nPopulation", t, "done.\n")) + cat(paste("\nPopulation", p, "done.\n")) # Add the models visited in the current population to the model list - models[[t]] <- mjmcmc_res$models + models[[p]] <- mjmcmc_res$models # Calculate marginal likelihoods for current features - marg.probs[[t]] <- marginal.probs.renorm(c(mjmcmc_res$models, mjmcmc_res$lo.models))$probs + marg.probs[[p]] <- marginal.probs.renorm(c(mjmcmc_res$models, mjmcmc_res$lo.models))$probs # Store best marginal model probability for current population - best.margs[[t]] <- mjmcmc_res$best.crit + best.margs[[p]] <- mjmcmc_res$best.crit # Print the marginal posterior distribution of the features after MJMCMC cat(paste("\rCurrent best crit:", mjmcmc_res$best.crit, "\n")) cat("Feature importance:\n") - print.dist(marg.probs[[t]], sapply(S[[t]], print.feature, labels = labels, round = 2), probs$filter) + print.dist(marg.probs[[p]], sapply(S[[p]], print.feature, labels = labels, round = 2), probs$filter) if (params$rescale.large) prev.large <- params$large # Generate a new population of features for the next iteration (if this is not the last) - if (t != T) { - S[[t + 1]] <- gmjmcmc.transition(S[[t]], F.0, data, loglik.alpha, marg.probs[[1]], marg.probs[[t]], labels, probs, params$feat) - complex <- complex.features(S[[t + 1]]) - if (params$rescale.large) params$large <- lapply(prev.large, function(x) x * length(S[[t + 1]]) / length(S[[t]])) + if (p != P) { + S[[p + 1]] <- gmjmcmc.transition(S[[p]], F.0, data, loglik.alpha, marg.probs[[1]], marg.probs[[p]], labels, probs, params$feat) + complex <- complex.features(S[[p + 1]]) + if (params$rescale.large) params$large <- lapply(prev.large, function(x) x * length(S[[p + 1]]) / length(S[[p]])) } } # Calculate acceptance rate - accept.tot <- sum(unlist(accept)) / (N.init*(T-1)+N.final) + accept.tot <- sum(unlist(accept)) / (N.init * (P - 1) + N.final) accept <- lapply(accept, function (x) x / N.init) - accept[[T]] <- accept[[T]]*N.init/N.final + accept[[P]] <- accept[[P]] * N.init / N.final # Return formatted results results <- list(models=models, # All models per population populations=S, # All features per population diff --git a/R/likelihoods.R b/R/likelihoods.R index 775a351..1491051 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -14,7 +14,7 @@ #' @param params A list of parameters for the log likelihood, supplied by the user #' #' @export logistic.loglik -logistic.loglik <- function (y, x, model, complex, params) { +logistic.loglik <- function (y, x, model, complex, params = list(r = 1)) { suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = binomial())}) ret <- (-(mod$deviance -2 * log(params$r) * sum(complex$oc))) / 2 return(list(crit=ret, coefs=mod$coefficients)) @@ -43,9 +43,9 @@ logistic.loglik.alpha <- function (a, data, mu_func) { #' @param params A list of parameters for the log likelihood, supplied by the user #' #' @export gaussian.loglik -gaussian.loglik <- function (y, x, model, complex, params) { - suppressWarnings({mod <- fastglm(as.matrix(x[,model]), y, family=gaussian())}) - ret <- (-(mod$deviance + 2*log(length(y))*(mod$rank-1) - 2*log(params$r)*(sum(complex$oc))))/2 +gaussian.loglik <- function (y, x, model, complex, params = list(r = 1)) { + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = gaussian())}) + ret <- (-(mod$deviance + 2 * log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * (sum(complex$oc)))) / 2 return(list(crit=ret, coefs=mod$coefficients)) } @@ -73,7 +73,7 @@ gaussian.loglik.alpha <- function (a, data, mu_func) { #' @param params A list of parameters for the log likelihood, supplied by the user #' #' @export gaussian.loglik.bic.irlssgd -gaussian.loglik.bic.irlssgd <- function (y, x, model, complex, params) { +gaussian.loglik.bic.irlssgd <- function (y, x, model, complex, params = list(r = 1, subs = 0.5)) { mod <- irls.sgd(as.matrix(x[,model]), y, gaussian(), irls.control=list(subs=params$subs, maxit=20, tol=1e-7, cooling = c(1,0.9,0.75), expl = c(3,1.5,1)), sgd.control=list(subs=params$subs, maxit=250, alpha=0.001, decay=0.99, histfreq=10)) @@ -90,11 +90,11 @@ gaussian.loglik.bic.irlssgd <- function (y, x, model, complex, params) { #' @param params A list of parameters for the log likelihood, supplied by the user #' #' @export linear.g.prior.loglik -linear.g.prior.loglik <- function (y, x, model, complex, params) { - out <- lm.fit(as.matrix(x[,model]),y) - rsquared <- 1-sum(var(out$residuals))/sum(var(y)) +linear.g.prior.loglik <- function (y, x, model, complex, params = list(g = 4)) { + out <- lm.fit(as.matrix(x[, model]), y) + rsquared <- 1 - sum(var(out$residuals)) / sum(var(y)) p <- out$rank n <- nrow(x) - logmarglik <- 0.5*(log(1+params$g)*(n-p) - log(1+params$g*(1-rsquared))*(n-1))*(p!=1) + logmarglik <- 0.5 * (log(1 + params$g) * (n - p) - log(1 + params$g * (1 - rsquared)) * (n - 1)) * (p != 1) return(logmarglik) } diff --git a/R/mjmcmc.R b/R/mjmcmc.R index 9cba51c..ca5b750 100644 --- a/R/mjmcmc.R +++ b/R/mjmcmc.R @@ -15,9 +15,14 @@ #' @param sub An indicator that if the likelihood is inexact and should be improved each model visit (EXPERIMENTAL!) #' #' @export mjmcmc -mjmcmc <- function (data, loglik.pi, N, probs, params, sub=F) { +mjmcmc <- function (data, loglik.pi, N = 100, probs = NULL, params = NULL, sub = FALSE) { # Verify that data is well-formed data <- check.data(data) + + # Generate default probabilities and parameters if there are none supplied. + if (is.null(probs)) probs <- gen.probs.mjmcmc() + if (is.null(params)) params <- gen.params.mjmcmc(data) + # Acceptance probability accept <- 0 diff --git a/R/parallel.R b/R/parallel.R index 0d872c4..9576d47 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -5,7 +5,7 @@ #' @return Merged results from multiple mjmcmc runs #' @export mjmcmc.parallel <- function (runs, cores=getOption("mc.cores", 2L), ...) { - results <- mclapply(seq_len(runs), function (x) { gmjmcmc(...) }, mc.cores=cores) + results <- mclapply(seq_len(runs), function (x) { mjmcmc(...) }, mc.cores=cores) } diff --git a/man/gaussian.loglik.Rd b/man/gaussian.loglik.Rd index 24ad6f8..18ae582 100644 --- a/man/gaussian.loglik.Rd +++ b/man/gaussian.loglik.Rd @@ -4,7 +4,7 @@ \alias{gaussian.loglik} \title{Log likelihood function for gaussian regression with a prior p(m)=r*sum(total_width).} \usage{ -gaussian.loglik(y, x, model, complex, params) +gaussian.loglik(y, x, model, complex, params = list(r = 1)) } \arguments{ \item{y}{A vector containing the dependent variable} diff --git a/man/gaussian.loglik.bic.irlssgd.Rd b/man/gaussian.loglik.bic.irlssgd.Rd index 7686eb3..b7f8ddd 100644 --- a/man/gaussian.loglik.bic.irlssgd.Rd +++ b/man/gaussian.loglik.bic.irlssgd.Rd @@ -4,7 +4,13 @@ \alias{gaussian.loglik.bic.irlssgd} \title{Log likelihood function for gaussian regression with a prior p(m)=r*sum(total_width), using subsampling.} \usage{ -gaussian.loglik.bic.irlssgd(y, x, model, complex, params) +gaussian.loglik.bic.irlssgd( + y, + x, + model, + complex, + params = list(r = 1, subs = 0.5) +) } \arguments{ \item{y}{A vector containing the dependent variable} diff --git a/man/gen.params.gmjmcmc.Rd b/man/gen.params.gmjmcmc.Rd new file mode 100644 index 0000000..d47b75d --- /dev/null +++ b/man/gen.params.gmjmcmc.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/arguments.R +\name{gen.params.gmjmcmc} +\alias{gen.params.gmjmcmc} +\title{Generate a parameter list for GMJMCMC} +\usage{ +gen.params.gmjmcmc(data) +} +\arguments{ +\item{data}{The dataset that will be used in the algorithm} +} +\description{ +Generate a parameter list for GMJMCMC +} diff --git a/man/gen.params.list.Rd b/man/gen.params.list.Rd deleted file mode 100644 index 28da31f..0000000 --- a/man/gen.params.list.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/arguments.R -\name{gen.params.list} -\alias{gen.params.list} -\title{Generate a parameter list for (G)MJMCMC} -\usage{ -gen.params.list(data, G = F) -} -\arguments{ -\item{data}{The dataset that will be used in the algorithm} - -\item{G}{True if we want parameters for GMJMCMC, false for MJMCMC} -} -\description{ -Generate a parameter list for (G)MJMCMC -} diff --git a/man/gen.params.mjmcmc.Rd b/man/gen.params.mjmcmc.Rd new file mode 100644 index 0000000..eb0acc0 --- /dev/null +++ b/man/gen.params.mjmcmc.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/arguments.R +\name{gen.params.mjmcmc} +\alias{gen.params.mjmcmc} +\title{Generate a parameter list for MJMCMC} +\usage{ +gen.params.mjmcmc(data) +} +\arguments{ +\item{data}{The dataset that will be used in the algorithm} +} +\value{ +A list of parameters to use when running the MJMCMC algorithm. + +TODO: WRITE MORE + +Note that the $loglik item is an empty list, which is passed to the log likelihood function of the model, +intended to store parameters that the estimator function should use. +} +\description{ +Generate a parameter list for MJMCMC +} diff --git a/man/gen.probs.gmjmcmc.Rd b/man/gen.probs.gmjmcmc.Rd new file mode 100644 index 0000000..732444e --- /dev/null +++ b/man/gen.probs.gmjmcmc.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/arguments.R +\name{gen.probs.gmjmcmc} +\alias{gen.probs.gmjmcmc} +\title{Generate a probability list for GMJMCMC} +\usage{ +gen.probs.gmjmcmc(transforms) +} +\arguments{ +\item{transforms}{A list of the transformations used (to get the count).} +} +\description{ +Generate a probability list for GMJMCMC +} diff --git a/man/gen.probs.list.Rd b/man/gen.probs.list.Rd deleted file mode 100644 index e70851d..0000000 --- a/man/gen.probs.list.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/arguments.R -\name{gen.probs.list} -\alias{gen.probs.list} -\title{Generate a probability list for (G)MJMCMC} -\usage{ -gen.probs.list(transforms = F) -} -\arguments{ -\item{transforms}{A list of the transformations used (to get the count), -default is false and gives a MJMCMC prob list.} -} -\description{ -Generate a probability list for (G)MJMCMC -} diff --git a/man/gen.probs.mjmcmc.Rd b/man/gen.probs.mjmcmc.Rd new file mode 100644 index 0000000..7a103c8 --- /dev/null +++ b/man/gen.probs.mjmcmc.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/arguments.R +\name{gen.probs.mjmcmc} +\alias{gen.probs.mjmcmc} +\title{Generate a probability list for MJMCMC} +\usage{ +gen.probs.mjmcmc() +} +\description{ +Generate a probability list for MJMCMC +} diff --git a/man/gmjmcmc.Rd b/man/gmjmcmc.Rd index 74b8c8c..41f3113 100644 --- a/man/gmjmcmc.Rd +++ b/man/gmjmcmc.Rd @@ -10,12 +10,12 @@ gmjmcmc( loglik.pi, loglik.alpha, transforms, - T, + P, N.init, N.final, - probs, - params, - sub = F + probs = NULL, + params = NULL, + sub = FALSE ) } \arguments{ @@ -29,7 +29,7 @@ and the rest of the columns should be the independent variables.} \item{transforms}{A list of the available nonlinear transformations for feature generation} -\item{T}{The number of population iterations} +\item{P}{The number of population iterations} \item{N.init}{The number of iterations per population (total iterations = (T-1)*N.init+N.final)} diff --git a/man/linear.g.prior.loglik.Rd b/man/linear.g.prior.loglik.Rd index 150a75d..ccd65f2 100644 --- a/man/linear.g.prior.loglik.Rd +++ b/man/linear.g.prior.loglik.Rd @@ -4,7 +4,7 @@ \alias{linear.g.prior.loglik} \title{Log likelihood function for linear regression using Zellners g-prior} \usage{ -linear.g.prior.loglik(y, x, model, complex, params) +linear.g.prior.loglik(y, x, model, complex, params = list(g = 4)) } \arguments{ \item{y}{A vector containing the dependent variable} diff --git a/man/logistic.loglik.Rd b/man/logistic.loglik.Rd index 2fde28f..69a581f 100644 --- a/man/logistic.loglik.Rd +++ b/man/logistic.loglik.Rd @@ -6,7 +6,7 @@ This function is created as an example of how to create an estimator that is used to calculate the marginal likelihood of a model.} \usage{ -logistic.loglik(y, x, model, complex, params) +logistic.loglik(y, x, model, complex, params = list(r = 1)) } \arguments{ \item{y}{A vector containing the dependent variable} diff --git a/man/mjmcmc.Rd b/man/mjmcmc.Rd index 086bc72..b8b5782 100644 --- a/man/mjmcmc.Rd +++ b/man/mjmcmc.Rd @@ -4,7 +4,7 @@ \alias{mjmcmc} \title{Main algorithm for MJMCMC} \usage{ -mjmcmc(data, loglik.pi, N, probs, params, sub = F) +mjmcmc(data, loglik.pi, N, probs = NULL, params = NULL, sub = FALSE) } \arguments{ \item{data}{A matrix containing the data to use in the algorithm, diff --git a/tests/testthat/test_mjmcmc.R b/tests/testthat/test_mjmcmc.R new file mode 100644 index 0000000..09ae00b --- /dev/null +++ b/tests/testthat/test_mjmcmc.R @@ -0,0 +1,38 @@ +# Title : TODO +# Objective : TODO +# Created by: jonlachmann +# Created on: 2021-02-25 + +context("MJMCMC") + +test_that("Testing MJMCMC algorithm", { + # Dummy test likelihood function + loglik.tester <- function (y, x, model, complex, params) { + lmmod <- lm.fit(x[, model, drop = FALSE], y) + n <- length(y) + k <- length(lmmod$coefficients) + rss <- sum(resid(lmmod)^2) + aic <- n * log(rss / n) + 2 * k + return(list(crit = aic)) + } + + data <- matrix(rnorm(600), 100) + resm <- mjmcmc(data, loglik.tester) + # summary(resm) TODO + # plot(resm) TODO + + resg <- gmjmcmc(data, loglik.tester, NULL, c("log", "exp")) + summary(resg) + plot(resg) + # predict(resg) TODO + + respm <- mjmcmc.parallel(2, 2, data, loglik.tester) + summary(respm) + plot(respm) + # predict(resg) TODO + + respg <- gmjmcmc.parallel(2, 2, data, loglik.tester, NULL, c("log", "exp")) + # summary(respg) TODO + # plot(respg) TODO + # predict(respg) TOO: First needs to run merge +}) diff --git a/vignettes/GMJMCMC-guide.Rmd b/vignettes/GMJMCMC-guide.Rmd index f84ee8a..438d724 100644 --- a/vignettes/GMJMCMC-guide.Rmd +++ b/vignettes/GMJMCMC-guide.Rmd @@ -53,7 +53,7 @@ params$feat$alpha <- 3 We are now ready to run the algorithm, in this vignette we will only run very few iterations for demonstration purposes, but the only thing that needs to be changed are the number or populations to visit `T`, the number of iterations per population `N` and the number of iterations for the final population `N.final` ```{r} set.seed(1234) -result <- gmjmcmc(bc, loglik.example, logistic.loglik.alpha, transforms, T=3, N=30, N.final=60, probs, params) +result <- gmjmcmc(bc, loglik.example, logistic.loglik.alpha, transforms, P=3, N=30, N.final=60, probs, params) ``` We can then summarize the results using the supplied function and get a plot of the importance of the parameters in the last population of features ```{r, fig.width=6, fig.height=6} -- GitLab From bc06fb875c03fa4050118b321ab127acc1294c2b Mon Sep 17 00:00:00 2001 From: Jon Date: Wed, 3 May 2023 22:24:20 +0200 Subject: [PATCH 02/24] Break out summary and plot functions to support both mjmcmc and gmjmcmc. Change result classes to be just mjmcmc and gmjmcmc. --- NAMESPACE | 8 +++---- R/gmjmcmc.R | 2 +- R/gmjmcmc_support.R | 29 ------------------------ R/mjmcmc.R | 3 +++ R/results.R | 44 ++++++++++++++++++++++++++++-------- man/gmjmcmc.Rd | 6 ++--- man/mjmcmc.Rd | 2 +- man/plot.gmjmcmcresult.Rd | 20 ---------------- man/print.model.Rd | 11 --------- man/summary.gmjmcmcresult.Rd | 16 ------------- man/summary.gmjresult.Rd | 16 ------------- tests/testthat/test_mjmcmc.R | 4 ++-- 12 files changed, 49 insertions(+), 112 deletions(-) delete mode 100644 man/plot.gmjmcmcresult.Rd delete mode 100644 man/print.model.Rd delete mode 100644 man/summary.gmjmcmcresult.Rd delete mode 100644 man/summary.gmjresult.Rd diff --git a/NAMESPACE b/NAMESPACE index 04b17d0..5900c54 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,10 @@ # Generated by roxygen2: do not edit by hand -S3method(plot,gmjmcmcresult) +S3method(plot,gmjmcmc) +S3method(plot,mjmcmc) S3method(print,feature) -S3method(print,model) -S3method(summary,gmjmcmcresult) +S3method(summary,gmjmcmc) +S3method(summary,mjmcmc) export(cosi) export(expi) export(gauss) @@ -30,7 +31,6 @@ export(set.transforms) export(sigmoid) export(sini) export(sqroot) -export(summary.gmjresult) export(to25) export(to35) export(troot) diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index 1e90991..e7c127c 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -107,7 +107,7 @@ gmjmcmc <- function (data, loglik.pi, loglik.alpha, transforms, P = 10, N.init = accept=accept, # Acceptance rate per population accept.tot=accept.tot, # Overall acceptance rate best=max(unlist(best.margs))) # Best marginal model probability throughout the run - attr(results, "class") <- "gmjmcmcresult" + attr(results, "class") <- "gmjmcmc" return(results) } diff --git a/R/gmjmcmc_support.R b/R/gmjmcmc_support.R index 53e4835..6113835 100644 --- a/R/gmjmcmc_support.R +++ b/R/gmjmcmc_support.R @@ -95,35 +95,6 @@ loglik.pre <- function (loglik.pi, model, complex, data, params) { return(model.res) } -#' Summarize results from GMJMCMC -#' -#' @param results The results from GMJMCMC -#' @param populations A list of the populations to include in the summary, defaults to the last one -#' -#' @export summary.gmjresult -summary.gmjresult <- function (results, population = "last") { - if (population == "last") pops <- length(results$models) - else pops <- population - feature_strings <- vector("list", length(results$populations[[pops]])) - for (i in seq_along(feature_strings)) { - feature_strings[[i]] <- print.feature(results$populations[[pops]][[i]], round = 2) - } - feature_importance <- marginal.probs.renorm(results$models[[pops]])$probs - return(list(features=feature_strings, importance=feature_importance)) -} - -#' Function to print all features in a model -#' -#' @export -print.model <- function (model, features) { - # Create a list to store the features in - model_print <- vector("list", sum(model$model)) - for (i in seq_along(model$model)) { - if (model$model[i]) model_print[[i]] <- print.feature(features[[i]]) - } - return(model_print) -} - # Function to check the data # Checks that there is an intercept in the data, adds it if missing # Coerces the data to be of type matrix diff --git a/R/mjmcmc.R b/R/mjmcmc.R index ca5b750..45b9c2f 100644 --- a/R/mjmcmc.R +++ b/R/mjmcmc.R @@ -41,7 +41,10 @@ mjmcmc <- function (data, loglik.pi, N = 100, probs = NULL, params = NULL, sub = # Calculate acceptance rate result$accept <- result$accept / N result$populations <- S + + result$marg.probs <- marginal.probs.renorm(c(result$models, result$lo.models))$probs # Return formatted results + class(result) <- "mjmcmc" return(result) } diff --git a/R/results.R b/R/results.R index 9d0a773..878a1a7 100644 --- a/R/results.R +++ b/R/results.R @@ -126,18 +126,27 @@ model.string <- function (model, features, link) { #' @param pop The population to print for, defaults to last #' #' @export -summary.gmjmcmcresult <- function (results, pop = "last") { +summary.gmjmcmc <- function (results, pop = "last") { if (pop == "last") pop <- length(results$models) + summary.mjmcmc(list(best = results$best, models = results$models[[pop]], populations = results$populations[[pop]])) +} + +#' Function to print a quick summary of the results +#' +#' @param results The results to use +#' +#' @export +summary.mjmcmc <- function (results) { # Get features as strings for printing - feats.strings <- sapply(results$populations[[pop]], print.feature, round = 2) + feats.strings <- sapply(results$populations, print.feature, round = 2) # Get marginal posterior of features - marg.probs <- marginal.probs.renorm(results$models[[pop]])$probs + marg.probs <- marginal.probs.renorm(results$models)$probs # Print the final distribution cat(" Importance | Feature\n") print.dist(marg.probs, feats.strings, -1) # Print the best marginal likelihood cat("\nBest marginal likelihood: ", results$best, "\n") - + ord.marg <- order(marg.probs[1,],decreasing = T) return(data.frame(feats.strings = feats.strings[ord.marg], marg.probs = marg.probs[1,ord.marg])) } @@ -150,9 +159,26 @@ summary.gmjmcmcresult <- function (results, pop = "last") { #' @param pop The population to plot, defaults to last #' #' @export -plot.gmjmcmcresult <- function (results, count="all", pop="last") { - if (pop=="last") pop <- length(results$populations) +plot.gmjmcmc <- function (results, count="all", pop="last") { + if (pop == "last") pop <- length(results$populations) + if (is.null(results$populations)) { + pops <- results$features + marg.probs <- results$marg.probs + } else { + pops <- results$populations[[pop]] + marg.probs <- results$marg.probs[[pop]] + } + plot.mjmcmc(list(populations = pops, marg.probs = marg.probs), count) +} +#' Function to plot the results, works both for results from gmjmcmc and +#' merged results from merge.results +#' +#' @param results The results to use +#' @param count The number of features to plot, defaults to all +#' +#' @export +plot.mjmcmc <- function (results, count="all") { ## Get features as strings for printing and marginal posteriors # If this is a merged results the structure is one way if (is.null(results$populations)) { @@ -161,8 +187,8 @@ plot.gmjmcmcresult <- function (results, count="all", pop="last") { marg.probs <- results$marg.probs } # If this is a result that is not merged, it is another way else { - feats.strings <- sapply(results$populations[[pop]], print) - marg.probs <- results$marg.probs[[pop]] + feats.strings <- sapply(results$populations, print) + marg.probs <- results$marg.probs } # Plot the distribution @@ -172,4 +198,4 @@ plot.gmjmcmcresult <- function (results, count="all", pop="last") { if (count=="all") count <- tot y <- barplot(marg.probs[(tot-count+1):tot], horiz=T, xlab="Marginal probability", ylab="Feature") text((max(marg.probs[(tot-count+1):tot])/2), y, feats.strings[(tot-count+1):tot]) -} \ No newline at end of file +} diff --git a/man/gmjmcmc.Rd b/man/gmjmcmc.Rd index 41f3113..4c913d8 100644 --- a/man/gmjmcmc.Rd +++ b/man/gmjmcmc.Rd @@ -10,9 +10,9 @@ gmjmcmc( loglik.pi, loglik.alpha, transforms, - P, - N.init, - N.final, + P = 10, + N.init = 100, + N.final = 100, probs = NULL, params = NULL, sub = FALSE diff --git a/man/mjmcmc.Rd b/man/mjmcmc.Rd index b8b5782..29e3867 100644 --- a/man/mjmcmc.Rd +++ b/man/mjmcmc.Rd @@ -4,7 +4,7 @@ \alias{mjmcmc} \title{Main algorithm for MJMCMC} \usage{ -mjmcmc(data, loglik.pi, N, probs = NULL, params = NULL, sub = FALSE) +mjmcmc(data, loglik.pi, N = 100, probs = NULL, params = NULL, sub = FALSE) } \arguments{ \item{data}{A matrix containing the data to use in the algorithm, diff --git a/man/plot.gmjmcmcresult.Rd b/man/plot.gmjmcmcresult.Rd deleted file mode 100644 index c11335a..0000000 --- a/man/plot.gmjmcmcresult.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/results.R -\name{plot.gmjmcmcresult} -\alias{plot.gmjmcmcresult} -\title{Function to plot the results, works both for results from gmjmcmc and -merged results from merge.results} -\usage{ -\method{plot}{gmjmcmcresult}(results, count = "all", pop = "last") -} -\arguments{ -\item{results}{The results to use} - -\item{count}{The number of features to plot, defaults to all} - -\item{pop}{The population to plot, defaults to last} -} -\description{ -Function to plot the results, works both for results from gmjmcmc and -merged results from merge.results -} diff --git a/man/print.model.Rd b/man/print.model.Rd deleted file mode 100644 index 27f98f3..0000000 --- a/man/print.model.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gmjmcmc_support.R -\name{print.model} -\alias{print.model} -\title{Function to print all features in a model} -\usage{ -\method{print}{model}(model, features) -} -\description{ -Function to print all features in a model -} diff --git a/man/summary.gmjmcmcresult.Rd b/man/summary.gmjmcmcresult.Rd deleted file mode 100644 index ceb1411..0000000 --- a/man/summary.gmjmcmcresult.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/results.R -\name{summary.gmjmcmcresult} -\alias{summary.gmjmcmcresult} -\title{Function to print a quick summary of the results} -\usage{ -\method{summary}{gmjmcmcresult}(results, pop = "last") -} -\arguments{ -\item{results}{The results to use} - -\item{pop}{The population to print for, defaults to last} -} -\description{ -Function to print a quick summary of the results -} diff --git a/man/summary.gmjresult.Rd b/man/summary.gmjresult.Rd deleted file mode 100644 index db8ccab..0000000 --- a/man/summary.gmjresult.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gmjmcmc_support.R -\name{summary.gmjresult} -\alias{summary.gmjresult} -\title{Summarize results from GMJMCMC} -\usage{ -\method{summary}{gmjresult}(results, population = "last") -} -\arguments{ -\item{results}{The results from GMJMCMC} - -\item{populations}{A list of the populations to include in the summary, defaults to the last one} -} -\description{ -Summarize results from GMJMCMC -} diff --git a/tests/testthat/test_mjmcmc.R b/tests/testthat/test_mjmcmc.R index 09ae00b..42a8476 100644 --- a/tests/testthat/test_mjmcmc.R +++ b/tests/testthat/test_mjmcmc.R @@ -18,8 +18,8 @@ test_that("Testing MJMCMC algorithm", { data <- matrix(rnorm(600), 100) resm <- mjmcmc(data, loglik.tester) - # summary(resm) TODO - # plot(resm) TODO + summary(resm) + plot(resm) resg <- gmjmcmc(data, loglik.tester, NULL, c("log", "exp")) summary(resg) -- GitLab From c279bf50723eb0b820c168e07e946d1330082503 Mon Sep 17 00:00:00 2001 From: Jon Date: Wed, 3 May 2023 22:29:40 +0200 Subject: [PATCH 03/24] wip --- R/parallel.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/parallel.R b/R/parallel.R index 9576d47..070dbd7 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -4,8 +4,8 @@ #' @param ... Parameters to pass to mjmcmc #' @return Merged results from multiple mjmcmc runs #' @export -mjmcmc.parallel <- function (runs, cores=getOption("mc.cores", 2L), ...) { - results <- mclapply(seq_len(runs), function (x) { mjmcmc(...) }, mc.cores=cores) +mjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), ...) { + results <- mclapply(seq_len(runs), function (x) { mjmcmc(...) }, mc.cores = cores) } @@ -15,6 +15,6 @@ mjmcmc.parallel <- function (runs, cores=getOption("mc.cores", 2L), ...) { #' @param ... Parameters to pass to gmjmcmc #' @return Results from multiple gmjmcmc runs #' @export -gmjmcmc.parallel <- function (runs, cores=getOption("mc.cores", 2L), ...) { - results <- mclapply(seq_len(runs), function (x) { gmjmcmc(...) }, mc.cores=cores) +gmjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), ...) { + results <- mclapply(seq_len(runs), function (x) { gmjmcmc(...) }, mc.cores = cores) } -- GitLab From bc1b0c0976379f8db8244ff02f0ac45c7e66fc00 Mon Sep 17 00:00:00 2001 From: Jon Date: Tue, 23 May 2023 21:47:50 +0200 Subject: [PATCH 04/24] Much more predict functions implemented. May need some refactoring eventually. --- NAMESPACE | 4 +++ R/gmjmcmc.R | 33 ++++++++++++----- R/gmjmcmc_support.R | 21 +++++++---- R/mjmcmc.R | 15 ++++++-- R/parallel.R | 4 +++ R/predict.R | 68 +++++++++++++++++++++++++++++++++++- R/results.R | 2 +- tests/testthat/test_mjmcmc.R | 13 +++---- 8 files changed, 135 insertions(+), 25 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 5900c54..18ce70f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,10 @@ S3method(plot,gmjmcmc) S3method(plot,mjmcmc) +S3method(predict,gmjmcmc) +S3method(predict,gmjmcmc_parallel) +S3method(predict,mjmcmc) +S3method(predict,mjmcmc_parallel) S3method(print,feature) S3method(summary,gmjmcmc) S3method(summary,mjmcmc) diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index e7c127c..5cacd0a 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -44,8 +44,13 @@ gmjmcmc <- function (data, loglik.pi, loglik.alpha, transforms, P = 10, N.init = S <- vector("list", P) # A list of models that have been visited, refering to the populations models <- vector("list", P) + lo.models <- vector("list", P) # A list of all the marginal probabilities for the features, per population marg.probs <- vector("list", P) + # A list of all the marginal probabilities for the models, per population + model.probs <- vector("list", P) + # A list of all the indices of the models which the marginal probabilities for the models refer to, per population + model.probs.idx <- vector("list", P) # A list of all the best marginal model likelihoods, per population best.margs <- vector("list", P) @@ -79,8 +84,13 @@ gmjmcmc <- function (data, loglik.pi, loglik.alpha, transforms, P = 10, N.init = # Add the models visited in the current population to the model list models[[p]] <- mjmcmc_res$models - # Calculate marginal likelihoods for current features - marg.probs[[p]] <- marginal.probs.renorm(c(mjmcmc_res$models, mjmcmc_res$lo.models))$probs + lo.models[[p]] <- mjmcmc_res$lo.models + # Store marginal likelihoods for current features + marg.probs[[p]] <- mjmcmc_res$marg.probs + # Store marginal likelihoods for the visited models + model.probs[[p]] <- mjmcmc_res$model.probs + # Store indices for which the marginal likelihoods for the visited models refer to + model.probs.idx[[p]] <- mjmcmc_res$model.probs.idx # Store best marginal model probability for current population best.margs[[p]] <- mjmcmc_res$best.crit # Print the marginal posterior distribution of the features after MJMCMC @@ -100,13 +110,18 @@ gmjmcmc <- function (data, loglik.pi, loglik.alpha, transforms, P = 10, N.init = accept <- lapply(accept, function (x) x / N.init) accept[[P]] <- accept[[P]] * N.init / N.final # Return formatted results - results <- list(models=models, # All models per population - populations=S, # All features per population - marg.probs=marg.probs, # Marginal feature probabilities per population - best.margs=best.margs, # Best marginal model probability per population - accept=accept, # Acceptance rate per population - accept.tot=accept.tot, # Overall acceptance rate - best=max(unlist(best.margs))) # Best marginal model probability throughout the run + results <- list( + models = models, # All models per population + lo.models = lo.models, # All local optim models per population + populations = S, # All features per population + marg.probs = marg.probs, # Marginal feature probabilities per population + model.probs = model.probs, # Marginal feature probabilities per population + model.probs.idx = model.probs.idx, # Marginal feature probabilities per population + best.margs = best.margs, # Best marginal model probability per population + accept = accept, # Acceptance rate per population + accept.tot = accept.tot, # Overall acceptance rate + best = max(unlist(best.margs)) # Best marginal model probability throughout the run + ) attr(results, "class") <- "gmjmcmc" return(results) } diff --git a/R/gmjmcmc_support.R b/R/gmjmcmc_support.R index 6113835..e9e5f88 100644 --- a/R/gmjmcmc_support.R +++ b/R/gmjmcmc_support.R @@ -46,7 +46,7 @@ marginal.probs <- function (models) { #' Function for calculating feature importance through renormalized model estimates #' @param models The models to use. #' @param type Select which probabilities are of interest, features or models -marginal.probs.renorm <- function (models, type="features") { +marginal.probs.renorm <- function (models, type = "features") { models <- lapply(models, function (x) x[c("model", "crit")]) model.size <- length(models[[1]]$model) models.matrix <- matrix(unlist(models), ncol = model.size + 1, byrow = TRUE) @@ -54,14 +54,23 @@ marginal.probs.renorm <- function (models, type="features") { models.matrix <- models.matrix[!duplicates, ] max_mlik <- max(models.matrix[, (model.size + 1)]) crit.sum <- sum(exp(models.matrix[, (model.size + 1)] - max_mlik)) + if (type == "features" || type == "both") { + probs.f <- matrix(NA,1, model.size) + for (i in 1:(model.size)) probs.f[i] <- sum(exp(models.matrix[as.logical(models.matrix[, i]),(model.size + 1)] - max_mlik)) / crit.sum + } + if (type == "models" || type == "both") { + probs.m <- matrix(NA,1, nrow(models.matrix)) + for (i in seq_len(nrow(models.matrix))) probs.m[i] <- exp(models.matrix[i, ncol(models.matrix)] - max_mlik) / crit.sum + } + if (type == "features") { - probs <- matrix(NA,1, model.size) - for (i in 1:(model.size)) probs[i] <- sum(exp(models.matrix[as.logical(models.matrix[, i]),(model.size + 1)] - max_mlik)) / crit.sum + result <- list(idx = which(!duplicates), probs = probs.f) } else if (type == "models") { - probs <- matrix(NA,1, nrow(models.matrix)) - for (i in seq_len(nrow(models.matrix))) probs[i] <- exp(models.matrix[i, ncol(models.matrix)] - max_mlik) / crit.sum + result <- list(idx = which(!duplicates), probs = probs.m) + } else { + result <- list(idx = which(!duplicates), probs.f = probs.f, probs.m = probs.m) } - return(list(idx = which(!duplicates), probs = probs)) + return(result) } diff --git a/R/mjmcmc.R b/R/mjmcmc.R index 45b9c2f..5ead556 100644 --- a/R/mjmcmc.R +++ b/R/mjmcmc.R @@ -42,7 +42,6 @@ mjmcmc <- function (data, loglik.pi, N = 100, probs = NULL, params = NULL, sub = result$accept <- result$accept / N result$populations <- S - result$marg.probs <- marginal.probs.renorm(c(result$models, result$lo.models))$probs # Return formatted results class(result) <- "mjmcmc" return(result) @@ -127,7 +126,19 @@ mjmcmc.loop <- function (data, complex, loglik.pi, model.cur, N, probs, params, # Add the current model to the list of visited models models[[i]] <- model.cur } - return(list(models=models, accept=accept, lo.models=lo.models, best.crit=best.crit)) + + # Calculate and store the marginal inclusion probabilities and the model probabilities + marg.probs <- marginal.probs.renorm(c(models, lo.models), type = "both") + + return(list( + models = models, + accept = accept, + lo.models = lo.models, + best.crit = best.crit, + marg.probs = marg.probs$probs.f, + model.probs = marg.probs$probs.m, + model.probs.idx = marg.probs$idx + )) } #' Subalgorithm for generating a proposal and acceptance probability in (G)MJMCMC diff --git a/R/parallel.R b/R/parallel.R index 070dbd7..dc7a274 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -6,6 +6,8 @@ #' @export mjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), ...) { results <- mclapply(seq_len(runs), function (x) { mjmcmc(...) }, mc.cores = cores) + class(results) <- "mjmcmc_parallel" + return(results) } @@ -17,4 +19,6 @@ mjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), ...) { #' @export gmjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), ...) { results <- mclapply(seq_len(runs), function (x) { gmjmcmc(...) }, mc.cores = cores) + class(results) <- "gmjmcmc_parallel" + return(results) } diff --git a/R/predict.R b/R/predict.R index b5130ee..113b0d0 100644 --- a/R/predict.R +++ b/R/predict.R @@ -1,3 +1,20 @@ +#' @export +predict.gmjmcmc <- function (model, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975)) { + merged <- merge.results(list(model)) + return(predict.bgnlm(merged, x, link, quantiles)) +} + +#' New idea for a more streamlined function... +#' Produces slightly different results from the fun above since this is using all lo.models too. +predict.gmjmcmc.2 <- function (model, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), pop = 1) { + + mmodel <- lapply(model[1:8], function (x) x[[pop]]) + + # Precalculate the features for the new data (c(0,1...) is because precalc features thinks there is an intercept and y col). + x.precalc <- precalc.features(cbind(0, 1, x), mmodel$populations)[, -1] + return(predict.mjmcmc(mmodel, x.precalc, link, quantiles)) +} + #' Predict using a BGNLM model. #' #' @param model The model to use. @@ -6,7 +23,7 @@ #' @param quantiles The quantiles to calculate credible intervals for the posterior moddes (in model space). #' #' @export predict.bgnlm -predict.bgnlm <- function (model, x, link=function(x) x, quantiles=c(0.025, 0.5, 0.975)) { +predict.bgnlm <- function (model, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975)) { x <- as.matrix(x) preds <- list() for (i in seq_along(model$results)) { @@ -47,6 +64,55 @@ predict.bgnlm <- function (model, x, link=function(x) x, quantiles=c(0.025, 0.5, return(list(aggr=aggr, preds=preds)) } +#' @export +predict.mjmcmc <- function (model, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975)) { + # Select the models and features to predict from at this iteration + models <- c(model$models, model$lo.models)[model$model.probs.idx] + + yhat <- matrix(0, nrow=nrow(x), ncol=length(models)) + for (k in seq_along(models)) { + # Models which have 0 weight are skipped since they may also be invalid, and would not influence the predictions. + if (models[[k]]$crit == -.Machine$double.xmax) next + yhat[, k] <- link(x[, c(TRUE, models[[k]]$model), drop=FALSE] %*% models[[k]]$coefs) + } + + mean.pred <- rowSums(yhat %*% diag(as.numeric(model$model.probs))) + pred.quant <- apply(yhat, 1, weighted.quantiles, weights = model$model.probs, prob = quantiles) + + return(list(mean = mean.pred, quantiles = pred.quant)) +} + +#' @export +predict.mjmcmc_parallel <- function (results, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975)) { + max.crits <- numeric() + for (i in seq_along(results)) { + max.crits <- c(max.crits, results[[i]]$best.crit) + } + max.crit <- max(max.crits) + result.weights <- exp(max.crits - max.crit) / sum(exp(max.crits - max.crit)) + + preds <- list() + for (i in seq_along(results)) { + preds[[i]] <- predict.mjmcmc(results[[i]], x, link, quantiles) + } + + aggr <- list() + aggr$mean <- 0 * preds[[1]]$mean + aggr$quantiles <- 0 * preds[[1]]$quantiles + for (i in seq_along(preds)) { + aggr$mean <- aggr$mean + preds[[i]]$mean * result.weights[i] + aggr$quantiles <- aggr$quantiles + preds[[i]]$quantiles * result.weights[i] + } + + return(list(aggr = aggr, preds = preds)) +} + +#' @export +predict.gmjmcmc_parallel <- function (results, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), ...) { + merged <- merge.results(results, ...) + predict.bgnlm(merged, x, link, quantiles) +} + #' Calculate weighted quantiles #' #' @param values The values to use diff --git a/R/results.R b/R/results.R index 878a1a7..791a3ab 100644 --- a/R/results.R +++ b/R/results.R @@ -103,7 +103,7 @@ population.weigths <- function (results, pops.use) { } max.crits <- unlist(max.crits) max.crit <- max(max.crits) - return(exp(max.crits-max.crit)/sum(exp(max.crits-max.crit))) + return(exp(max.crits-max.crit) / sum(exp(max.crits-max.crit))) } #' Function to generate a function string for a model consisting of features diff --git a/tests/testthat/test_mjmcmc.R b/tests/testthat/test_mjmcmc.R index 42a8476..d8fe835 100644 --- a/tests/testthat/test_mjmcmc.R +++ b/tests/testthat/test_mjmcmc.R @@ -13,26 +13,27 @@ test_that("Testing MJMCMC algorithm", { k <- length(lmmod$coefficients) rss <- sum(resid(lmmod)^2) aic <- n * log(rss / n) + 2 * k - return(list(crit = aic)) + return(list(crit = aic, coefs = lmmod$coefficients)) } data <- matrix(rnorm(600), 100) resm <- mjmcmc(data, loglik.tester) summary(resm) plot(resm) + predm <- predict(resm, cbind(1, data[, -1, drop = FALSE])) resg <- gmjmcmc(data, loglik.tester, NULL, c("log", "exp")) summary(resg) plot(resg) - # predict(resg) TODO + prediction <- predict(resg, cbind(1, data[, -1, drop = FALSE])) respm <- mjmcmc.parallel(2, 2, data, loglik.tester) - summary(respm) - plot(respm) - # predict(resg) TODO + summary(respm) # TODO + plot(respm) # TODO + pred_pm <- predict(respm, cbind(1, data[, -1, drop = FALSE])) respg <- gmjmcmc.parallel(2, 2, data, loglik.tester, NULL, c("log", "exp")) # summary(respg) TODO # plot(respg) TODO - # predict(respg) TOO: First needs to run merge + pred_pg <- predict(respg, cbind(1, data[, -1, drop = FALSE])) }) -- GitLab From 962c5a10aa01cbda45ee68e97a1781ca71bde6b7 Mon Sep 17 00:00:00 2001 From: Jon Date: Wed, 31 May 2023 21:50:36 +0200 Subject: [PATCH 05/24] Complete set of summary/plot/predict functions present. Cleanup, documentation and putting them in appropriate files remain. --- NAMESPACE | 6 +- R/feature.R | 4 +- R/parallel.R | 10 +- R/predict.R | 6 +- R/results.R | 99 ++++++++++++++++--- man/gmjmcmc.parallel.Rd | 11 ++- man/merge.results.Rd | 8 +- man/plot.gmjmcmc.Rd | 20 ++++ man/plot.gmjmcmc_merged.Rd | 11 +++ man/plot.mjmcmc.Rd | 18 ++++ man/plot.mjmcmc_parallel.Rd | 11 +++ man/predict.gmjmcmc.2.Rd | 19 ++++ ...ict.bgnlm.Rd => predict.gmjmcmc_merged.Rd} | 6 +- man/summary.gmjmcmc.Rd | 16 +++ man/summary.mjmcmc.Rd | 14 +++ man/summary.mjmcmc_parallel.Rd | 14 +++ tests/testthat/test_mjmcmc.R | 12 +-- 17 files changed, 250 insertions(+), 35 deletions(-) create mode 100644 man/plot.gmjmcmc.Rd create mode 100644 man/plot.gmjmcmc_merged.Rd create mode 100644 man/plot.mjmcmc.Rd create mode 100644 man/plot.mjmcmc_parallel.Rd create mode 100644 man/predict.gmjmcmc.2.Rd rename man/{predict.bgnlm.Rd => predict.gmjmcmc_merged.Rd} (74%) create mode 100644 man/summary.gmjmcmc.Rd create mode 100644 man/summary.mjmcmc.Rd create mode 100644 man/summary.mjmcmc_parallel.Rd diff --git a/NAMESPACE b/NAMESPACE index 18ce70f..fb936b3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,14 +1,19 @@ # Generated by roxygen2: do not edit by hand S3method(plot,gmjmcmc) +S3method(plot,gmjmcmc_merged) S3method(plot,mjmcmc) +S3method(plot,mjmcmc_parallel) S3method(predict,gmjmcmc) +S3method(predict,gmjmcmc_merged) S3method(predict,gmjmcmc_parallel) S3method(predict,mjmcmc) S3method(predict,mjmcmc_parallel) S3method(print,feature) S3method(summary,gmjmcmc) +S3method(summary,gmjmcmc_merged) S3method(summary,mjmcmc) +S3method(summary,mjmcmc_parallel) export(cosi) export(expi) export(gauss) @@ -30,7 +35,6 @@ export(merge.results) export(mjmcmc) export(mjmcmc.parallel) export(model.string) -export(predict.bgnlm) export(set.transforms) export(sigmoid) export(sini) diff --git a/R/feature.R b/R/feature.R index 11de0c7..ec1712e 100644 --- a/R/feature.R +++ b/R/feature.R @@ -100,12 +100,12 @@ update.alphas <- function (feature, alphas, recurse=FALSE) { #' #' @export print.feature <- function (feature, dataset = FALSE, alphas = FALSE, labels = FALSE, round = FALSE) { - transforms <- getOption("gmjmcmc-transformations") - if (is.null(transforms)) stop("Please set the gmjmcmc-transformations option to your non-linear functions (see ?set.transforms).") fString <- "" feat <- feature[[length(feature)]] # This is a more complex feature if (is.matrix(feat)) { + transforms <- getOption("gmjmcmc-transformations") + if (is.null(transforms)) stop("Please set the gmjmcmc-transformations option to your non-linear functions (see ?set.transforms).") # Assume that we are not doing multiplication op <- "+" # Add the outer transform is there is one diff --git a/R/parallel.R b/R/parallel.R index dc7a274..f49d2fd 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -17,8 +17,12 @@ mjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), ...) { #' @param ... Parameters to pass to gmjmcmc #' @return Results from multiple gmjmcmc runs #' @export -gmjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), ...) { - results <- mclapply(seq_len(runs), function (x) { gmjmcmc(...) }, mc.cores = cores) +gmjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), merge.options = NULL, data, loglik.pi, loglik.alpha, transforms, ...) { + options("gmjmcmc-transformations" = transforms) + results <- mclapply(seq_len(runs), function (x) { + gmjmcmc(data = data, loglik.pi = loglik.pi, loglik.alpha = loglik.alpha, transforms = transforms, ...) + }, mc.cores = cores) class(results) <- "gmjmcmc_parallel" - return(results) + merged <- merge.results(results, merge.options$populations, merge.options$complex.measure, merge.options$tol) + return(merged) } diff --git a/R/predict.R b/R/predict.R index 113b0d0..0f57bde 100644 --- a/R/predict.R +++ b/R/predict.R @@ -1,7 +1,7 @@ #' @export predict.gmjmcmc <- function (model, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975)) { merged <- merge.results(list(model)) - return(predict.bgnlm(merged, x, link, quantiles)) + return(predict.gmjmcmc_merged(merged, x, link, quantiles)) } #' New idea for a more streamlined function... @@ -22,8 +22,8 @@ predict.gmjmcmc.2 <- function (model, x, link = function(x) x, quantiles = c(0.0 #' @param link The link function to use #' @param quantiles The quantiles to calculate credible intervals for the posterior moddes (in model space). #' -#' @export predict.bgnlm -predict.bgnlm <- function (model, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975)) { +#' @export +predict.gmjmcmc_merged <- function (model, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975)) { x <- as.matrix(x) preds <- list() for (i in seq_along(model$results)) { diff --git a/R/results.R b/R/results.R index 791a3ab..dc5c614 100644 --- a/R/results.R +++ b/R/results.R @@ -15,15 +15,23 @@ #' if data is supplied it should be of the same form as is required by gmjmcmc, i.e. with both x, y and an intercept. #' #' @export merge.results -merge.results <- function (results, populations="last", complex.measure=1, tol=0, data=NULL) { +merge.results <- function (results, populations = NULL, complex.measure = NULL, tol = NULL, data = NULL) { + # Default values + if (is.null(populations)) + populations <- "last" + if (is.null(complex.measure)) + complex.measure <- 1 + if (is.null(tol)) + tol <- 0 + res.count <- length(results) # Select populations to use res.lengths <- vector("list") for (i in 1:res.count) res.lengths[[i]] <- length(results[[i]]$populations) - if (populations=="last") pops.use <- res.lengths - else if (populations=="all") pops.use <- lapply(res.lengths, function(x) 1:x) - else if (populations=="best") pops.use <- lapply(1:res.count, function(x) which.max(unlist(results[[x]]$best.marg))) + if (populations == "last") pops.use <- res.lengths + else if (populations == "all") pops.use <- lapply(res.lengths, function(x) 1:x) + else if (populations == "best") pops.use <- lapply(1:res.count, function(x) which.max(unlist(results[[x]]$best.marg))) # Get the population weigths to be able to weight the features pop.weights <- population.weigths(results, pops.use) @@ -89,9 +97,9 @@ merge.results <- function (results, populations="last", complex.measure=1, tol=0 feats.simplest.ids <- feats.simplest.ids[order(feats.map[4, feats.simplest.ids])] counts <- sapply(feats.simplest.ids, function(x) sum(feats.map[1,] == x)) feats.simplest <- features[feats.simplest.ids] - importance <- feats.map[4, feats.simplest.ids] - merged <- list(features=feats.simplest, marg.probs=importance, counts=counts, results=results) - attr(merged, "class") <- "bgnlm" + importance <- feats.map[4, feats.simplest.ids, drop = FALSE] + merged <- list(features = feats.simplest, marg.probs = importance, counts = counts, results = results) + attr(merged, "class") <- "gmjmcmc_merged" return(merged) } @@ -131,24 +139,46 @@ summary.gmjmcmc <- function (results, pop = "last") { summary.mjmcmc(list(best = results$best, models = results$models[[pop]], populations = results$populations[[pop]])) } +#' @export +summary.gmjmcmc_merged <- function (x) { + best <- max(sapply(x$results, function (y) y$best)) + feats.strings <- sapply(x$features, print) + summary_internal(best, feats.strings, x$marg.probs) +} + #' Function to print a quick summary of the results #' #' @param results The results to use #' #' @export summary.mjmcmc <- function (results) { + return(summary.mjmcmc_parallel(list(results))) +} + +#' Function to print a quick summary of the results +#' +#' @param results The results to use +#' +#' @export +summary.mjmcmc_parallel <- function (results) { # Get features as strings for printing - feats.strings <- sapply(results$populations, print.feature, round = 2) + feats.strings <- sapply(results[[1]]$populations, print.feature, round = 2) # Get marginal posterior of features - marg.probs <- marginal.probs.renorm(results$models)$probs + models <- unlist(lapply(results, function (x) x$models), recursive = FALSE) + marg.probs <- marginal.probs.renorm(models)$probs + best <- max(sapply(results, function (x) x$best)) + return(summary_internal(best, feats.strings, marg.probs)) +} + +summary_internal <- function (best, feats.strings, marg.probs) { # Print the final distribution cat(" Importance | Feature\n") print.dist(marg.probs, feats.strings, -1) # Print the best marginal likelihood - cat("\nBest marginal likelihood: ", results$best, "\n") + cat("\nBest marginal likelihood: ", best, "\n") - ord.marg <- order(marg.probs[1,],decreasing = T) - return(data.frame(feats.strings = feats.strings[ord.marg], marg.probs = marg.probs[1,ord.marg])) + ord.marg <- order(marg.probs[1, ], decreasing = T) + return(data.frame(feats.strings = feats.strings[ord.marg], marg.probs = marg.probs[1, ord.marg])) } #' Function to plot the results, works both for results from gmjmcmc and @@ -178,7 +208,7 @@ plot.gmjmcmc <- function (results, count="all", pop="last") { #' @param count The number of features to plot, defaults to all #' #' @export -plot.mjmcmc <- function (results, count="all") { +plot.mjmcmc <- function (results, count = "all") { ## Get features as strings for printing and marginal posteriors # If this is a merged results the structure is one way if (is.null(results$populations)) { @@ -191,11 +221,50 @@ plot.mjmcmc <- function (results, count="all") { marg.probs <- results$marg.probs } + marg.prob.plot(feats.strings, marg.probs, count) +} + +marg.prob.plot <- function (feats.strings, marg.probs, count = "all") { # Plot the distribution feats.strings <- feats.strings[order(marg.probs)] marg.probs <- sort(marg.probs) tot <- length(marg.probs) if (count=="all") count <- tot - y <- barplot(marg.probs[(tot-count+1):tot], horiz=T, xlab="Marginal probability", ylab="Feature") - text((max(marg.probs[(tot-count+1):tot])/2), y, feats.strings[(tot-count+1):tot]) + y <- barplot(marg.probs[(tot - count + 1):tot], horiz = T, xlab = "Marginal probability", ylab = "Feature") + text((max(marg.probs[(tot - count + 1):tot]) / 2), y, feats.strings[(tot - count + 1):tot]) +} + +#' Plot a mjmcmc_parallel run +#' @export +plot.mjmcmc_parallel <- function (x) { + merged <- merge.mjmcmc_parallel(x) + marg.prob.plot(merged$features, merged$marg.probs) +} + +merge.mjmcmc_parallel <- function (x) { + run.weights <- run.weigths(x) + marg.probs <- x[[1]]$marg.probs * run.weights[1] + for (i in seq_along(x[-1])) { + marg.probs <- marg.probs + x[[i]]$marg.probs * run.weights[i] + } + return(structure( + list( + features = sapply(x[[1]]$populations, print), + marg.probs = marg.probs, + results = x + ), + class = "mjmcmc_merged" + )) +} + +run.weigths <- function (results) { + best.crits <- sapply(results, function (x) x$best.crit) + max.crit <- max(best.crits) + return(exp(best.crits - max.crit) / sum(exp(best.crits - max.crit))) +} + +#' Plot a gmjmcmc_merged run +#' @export +plot.gmjmcmc_merged <- function (x) { + marg.prob.plot(sapply(x$features, print), x$marg.probs) } diff --git a/man/gmjmcmc.parallel.Rd b/man/gmjmcmc.parallel.Rd index 350fbda..9678d1e 100644 --- a/man/gmjmcmc.parallel.Rd +++ b/man/gmjmcmc.parallel.Rd @@ -4,7 +4,16 @@ \alias{gmjmcmc.parallel} \title{Run multiple gmjmcmc runs in parallel returning a list of all results.} \usage{ -gmjmcmc.parallel(runs, cores = getOption("mc.cores", 2L), ...) +gmjmcmc.parallel( + runs, + cores = getOption("mc.cores", 2L), + merge.options = NULL, + data, + loglik.pi, + loglik.alpha, + transforms, + ... +) } \arguments{ \item{runs}{The number of runs to run} diff --git a/man/merge.results.Rd b/man/merge.results.Rd index 66db51b..ad90c59 100644 --- a/man/merge.results.Rd +++ b/man/merge.results.Rd @@ -6,7 +6,13 @@ This function will weight the features based on the best mlik in that population and merge the results together, simplifying by merging equivalent features (having high correlation).} \usage{ -\method{merge}{results}(results, populations = "last", complex.measure = 1, tol = 0, data = NULL) +\method{merge}{results}( + results, + populations = NULL, + complex.measure = NULL, + tol = NULL, + data = NULL +) } \arguments{ \item{results}{A list containing multiple results from GMJMCMC.} diff --git a/man/plot.gmjmcmc.Rd b/man/plot.gmjmcmc.Rd new file mode 100644 index 0000000..fd71410 --- /dev/null +++ b/man/plot.gmjmcmc.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/results.R +\name{plot.gmjmcmc} +\alias{plot.gmjmcmc} +\title{Function to plot the results, works both for results from gmjmcmc and +merged results from merge.results} +\usage{ +\method{plot}{gmjmcmc}(results, count = "all", pop = "last") +} +\arguments{ +\item{results}{The results to use} + +\item{count}{The number of features to plot, defaults to all} + +\item{pop}{The population to plot, defaults to last} +} +\description{ +Function to plot the results, works both for results from gmjmcmc and +merged results from merge.results +} diff --git a/man/plot.gmjmcmc_merged.Rd b/man/plot.gmjmcmc_merged.Rd new file mode 100644 index 0000000..7b37a8d --- /dev/null +++ b/man/plot.gmjmcmc_merged.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/results.R +\name{plot.gmjmcmc_merged} +\alias{plot.gmjmcmc_merged} +\title{Plot a gmjmcmc_merged run} +\usage{ +\method{plot}{gmjmcmc_merged}(x) +} +\description{ +Plot a gmjmcmc_merged run +} diff --git a/man/plot.mjmcmc.Rd b/man/plot.mjmcmc.Rd new file mode 100644 index 0000000..cd18be0 --- /dev/null +++ b/man/plot.mjmcmc.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/results.R +\name{plot.mjmcmc} +\alias{plot.mjmcmc} +\title{Function to plot the results, works both for results from gmjmcmc and +merged results from merge.results} +\usage{ +\method{plot}{mjmcmc}(results, count = "all") +} +\arguments{ +\item{results}{The results to use} + +\item{count}{The number of features to plot, defaults to all} +} +\description{ +Function to plot the results, works both for results from gmjmcmc and +merged results from merge.results +} diff --git a/man/plot.mjmcmc_parallel.Rd b/man/plot.mjmcmc_parallel.Rd new file mode 100644 index 0000000..51c3490 --- /dev/null +++ b/man/plot.mjmcmc_parallel.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/results.R +\name{plot.mjmcmc_parallel} +\alias{plot.mjmcmc_parallel} +\title{Plot a mjmcmc_parallel run} +\usage{ +\method{plot}{mjmcmc_parallel}(x) +} +\description{ +Plot a mjmcmc_parallel run +} diff --git a/man/predict.gmjmcmc.2.Rd b/man/predict.gmjmcmc.2.Rd new file mode 100644 index 0000000..8297e3c --- /dev/null +++ b/man/predict.gmjmcmc.2.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/predict.R +\name{predict.gmjmcmc.2} +\alias{predict.gmjmcmc.2} +\title{New idea for a more streamlined function... +Produces slightly different results from the fun above since this is using all lo.models too.} +\usage{ +\method{predict}{gmjmcmc.2}( + model, + x, + link = function(x) x, + quantiles = c(0.025, 0.5, 0.975), + pop = 1 +) +} +\description{ +New idea for a more streamlined function... +Produces slightly different results from the fun above since this is using all lo.models too. +} diff --git a/man/predict.bgnlm.Rd b/man/predict.gmjmcmc_merged.Rd similarity index 74% rename from man/predict.bgnlm.Rd rename to man/predict.gmjmcmc_merged.Rd index cd788f9..7298062 100644 --- a/man/predict.bgnlm.Rd +++ b/man/predict.gmjmcmc_merged.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/predict.R -\name{predict.bgnlm} -\alias{predict.bgnlm} +\name{predict.gmjmcmc_merged} +\alias{predict.gmjmcmc_merged} \title{Predict using a BGNLM model.} \usage{ -\method{predict}{bgnlm}(model, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975)) +\method{predict}{gmjmcmc_merged}(model, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975)) } \arguments{ \item{model}{The model to use.} diff --git a/man/summary.gmjmcmc.Rd b/man/summary.gmjmcmc.Rd new file mode 100644 index 0000000..cb50752 --- /dev/null +++ b/man/summary.gmjmcmc.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/results.R +\name{summary.gmjmcmc} +\alias{summary.gmjmcmc} +\title{Function to print a quick summary of the results} +\usage{ +\method{summary}{gmjmcmc}(results, pop = "last") +} +\arguments{ +\item{results}{The results to use} + +\item{pop}{The population to print for, defaults to last} +} +\description{ +Function to print a quick summary of the results +} diff --git a/man/summary.mjmcmc.Rd b/man/summary.mjmcmc.Rd new file mode 100644 index 0000000..012e418 --- /dev/null +++ b/man/summary.mjmcmc.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/results.R +\name{summary.mjmcmc} +\alias{summary.mjmcmc} +\title{Function to print a quick summary of the results} +\usage{ +\method{summary}{mjmcmc}(results) +} +\arguments{ +\item{results}{The results to use} +} +\description{ +Function to print a quick summary of the results +} diff --git a/man/summary.mjmcmc_parallel.Rd b/man/summary.mjmcmc_parallel.Rd new file mode 100644 index 0000000..4cc0f05 --- /dev/null +++ b/man/summary.mjmcmc_parallel.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/results.R +\name{summary.mjmcmc_parallel} +\alias{summary.mjmcmc_parallel} +\title{Function to print a quick summary of the results} +\usage{ +\method{summary}{mjmcmc_parallel}(results) +} +\arguments{ +\item{results}{The results to use} +} +\description{ +Function to print a quick summary of the results +} diff --git a/tests/testthat/test_mjmcmc.R b/tests/testthat/test_mjmcmc.R index d8fe835..f928725 100644 --- a/tests/testthat/test_mjmcmc.R +++ b/tests/testthat/test_mjmcmc.R @@ -22,18 +22,18 @@ test_that("Testing MJMCMC algorithm", { plot(resm) predm <- predict(resm, cbind(1, data[, -1, drop = FALSE])) - resg <- gmjmcmc(data, loglik.tester, NULL, c("log", "exp")) + resg <- gmjmcmc(data, loglik.tester, NULL, c("logi", "exp")) summary(resg) plot(resg) prediction <- predict(resg, cbind(1, data[, -1, drop = FALSE])) respm <- mjmcmc.parallel(2, 2, data, loglik.tester) - summary(respm) # TODO - plot(respm) # TODO + summary(respm) + plot(respm) pred_pm <- predict(respm, cbind(1, data[, -1, drop = FALSE])) - respg <- gmjmcmc.parallel(2, 2, data, loglik.tester, NULL, c("log", "exp")) - # summary(respg) TODO - # plot(respg) TODO + respg <- gmjmcmc.parallel(2, 2, NULL, data, loglik.tester, NULL, c("logi", "exp")) + summary(respg) + plot(respg) pred_pg <- predict(respg, cbind(1, data[, -1, drop = FALSE])) }) -- GitLab From 31bdf8a7c84f99e85c6901af05941d0be1d2578e Mon Sep 17 00:00:00 2001 From: aliaksah Date: Mon, 19 Jun 2023 17:37:59 +0200 Subject: [PATCH 06/24] making cosmetic changes accroding to input from Florian: 1. Only outputting the posteriors for PIP > 0.0001 2. Additing count for plotting in parallel MJMCMC and GMJMCMC 3. Adding the default for r in gaussian loglik --- R/gmjmcmc_support.R | 2 +- R/likelihoods.R | 6 +++++- R/results.R | 18 ++++++++++++------ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/R/gmjmcmc_support.R b/R/gmjmcmc_support.R index e9e5f88..dca529a 100644 --- a/R/gmjmcmc_support.R +++ b/R/gmjmcmc_support.R @@ -89,7 +89,7 @@ precalc.features <- function (data, features) { # TODO: Compare to previous mliks here instead, also add a flag to do that in full likelihood estimation scenarios. # Function to call the model function -loglik.pre <- function (loglik.pi, model, complex, data, params) { +loglik.pre <- function (loglik.pi, model, complex, data, params = NULL) { # Get the complexity measures for just this model complex <- list(width = complex$width[model], oc = complex$oc[model], depth = complex$depth[model]) # Call the model estimator with the data and the model, note that we add the intercept to every model diff --git a/R/likelihoods.R b/R/likelihoods.R index 1491051..3b8099d 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -43,7 +43,11 @@ logistic.loglik.alpha <- function (a, data, mu_func) { #' @param params A list of parameters for the log likelihood, supplied by the user #' #' @export gaussian.loglik -gaussian.loglik <- function (y, x, model, complex, params = list(r = 1)) { +gaussian.loglik <- function (y, x, model, complex, params) { + + if(length(params) == 0) + params = list(r = 1/dim(x)[1]) + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = gaussian())}) ret <- (-(mod$deviance + 2 * log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * (sum(complex$oc)))) / 2 return(list(crit=ret, coefs=mod$coefficients)) diff --git a/R/results.R b/R/results.R index dc5c614..7c44848 100644 --- a/R/results.R +++ b/R/results.R @@ -177,8 +177,14 @@ summary_internal <- function (best, feats.strings, marg.probs) { # Print the best marginal likelihood cat("\nBest marginal likelihood: ", best, "\n") - ord.marg <- order(marg.probs[1, ], decreasing = T) - return(data.frame(feats.strings = feats.strings[ord.marg], marg.probs = marg.probs[1, ord.marg])) + keep <- which(marg.probs[1, ] > 0.0001) + feats.strings <- feats.strings[keep] + marg.probs <- marg.probs[1,keep] + + ord.marg <- order(marg.probs, decreasing = T) + + + return(data.frame(feats.strings = feats.strings[ord.marg], marg.probs = marg.probs[ord.marg])) } #' Function to plot the results, works both for results from gmjmcmc and @@ -236,9 +242,9 @@ marg.prob.plot <- function (feats.strings, marg.probs, count = "all") { #' Plot a mjmcmc_parallel run #' @export -plot.mjmcmc_parallel <- function (x) { +plot.mjmcmc_parallel <- function (x, count = "all") { merged <- merge.mjmcmc_parallel(x) - marg.prob.plot(merged$features, merged$marg.probs) + marg.prob.plot(merged$features, merged$marg.probs, count) } merge.mjmcmc_parallel <- function (x) { @@ -265,6 +271,6 @@ run.weigths <- function (results) { #' Plot a gmjmcmc_merged run #' @export -plot.gmjmcmc_merged <- function (x) { - marg.prob.plot(sapply(x$features, print), x$marg.probs) +plot.gmjmcmc_merged <- function (x, count = "all") { + marg.prob.plot(sapply(x$features, print), x$marg.probs, count = count) } -- GitLab From 2a0bbd387c779040387a169e4c59fa4fe27111c9 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Mon, 19 Jun 2023 17:52:06 +0200 Subject: [PATCH 07/24] export gen.probs.mjmcmc properly fixed --- R/arguments.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/arguments.R b/R/arguments.R index 5200863..ee6b4c3 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -5,7 +5,7 @@ #' Generate a probability list for MJMCMC #' -#' @export gen.probs.gmjmcmc +#' @export gen.probs.mjmcmc gen.probs.mjmcmc <- function () { ## Mode jumping algorithm probabilities large <- 0.05 # probability of a large jump -- GitLab From 321e2d768330de8eba850c2ae06da355ed410b33 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Tue, 20 Jun 2023 11:30:19 +0200 Subject: [PATCH 08/24] added default for merge in parallel GMJMCMC --- R/parallel.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/parallel.R b/R/parallel.R index f49d2fd..c318c67 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -17,7 +17,7 @@ mjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), ...) { #' @param ... Parameters to pass to gmjmcmc #' @return Results from multiple gmjmcmc runs #' @export -gmjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), merge.options = NULL, data, loglik.pi, loglik.alpha, transforms, ...) { +gmjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), merge.options = list(populations = "best", complex.measure = 1, tol = 0), data, loglik.pi, loglik.alpha, transforms, ...) { options("gmjmcmc-transformations" = transforms) results <- mclapply(seq_len(runs), function (x) { gmjmcmc(data = data, loglik.pi = loglik.pi, loglik.alpha = loglik.alpha, transforms = transforms, ...) -- GitLab From 37ee214dcb64cd25b0b445127e0c5fb9a9281b98 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Tue, 20 Jun 2023 14:58:29 +0200 Subject: [PATCH 09/24] added a few nonlinear functions for potenital Gs --- R/arguments.R | 2 +- R/nonlinear_functions.R | 146 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 146 insertions(+), 2 deletions(-) diff --git a/R/arguments.R b/R/arguments.R index ee6b4c3..57513e6 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -104,7 +104,7 @@ gen.params.gmjmcmc <- function (data) { ncov <- ncol(data) - 2 feat_params <- list(D = 5, L = 15, # Hard limits on feature complexity - alpha = 0, # alpha strategy (0=None, 1,2,3=strategies as per Hubin et al.) TODO: Fully Bayesian + alpha = 0, # alpha strategy (0 = None, 1,2,3 = strategies as per Hubin et al.) TODO: Fully Bayesian pop.max = as.integer(ncov * 1.5), # Max features population size keep.org = F, # Always keep original covariates in every population prel.filter = 0, # Filtration threshold for first population (i.e. filter covariates even if keep.org=T) diff --git a/R/nonlinear_functions.R b/R/nonlinear_functions.R index de0ec51..a508b73 100644 --- a/R/nonlinear_functions.R +++ b/R/nonlinear_functions.R @@ -96,7 +96,7 @@ to25 <- function(x)abs(x)^(2.5) #' @param x The vector of values #' @return x^(3) #' -#' @export to35 +#' @export to3 to3 <- function(x)abs(x)^(3) #' To 3.5 power @@ -106,3 +106,147 @@ to3 <- function(x)abs(x)^(3) #' #' @export to35 to35 <- function(x)abs(x)^(3.5) + +#' p0 polynomial term +#' +#' @param x The vector of values +#' @return log(abs(x) + 0.00001) +#' +#' @export p0 +p0 <- function(x) log(abs(x)+0.00001) + +#' pm1 polynomial term +#' +#' @param x The vector of values +#' @return (x+0.00001)^(-1) +#' +#' @export pm1 +pm1 <- function(x) (x+0.00001)^(-1) + +#' pm2 polynomial term +#' +#' @param x The vector of values +#' @return (x+0.00001)^(-2) +#' +#' @export pm2 +pm2 <- function(x) (x+0.00001)^(-2) + +#' pm05 polynomial term +#' +#' @param x The vector of values +#' @return (abs(x)+0.00001)^(-0.5) +#' +#' @export pm05 +pm05 <- function(x) (abs(x)+0.00001)^(-0.5) + +#' p05 polynomial term +#' +#' @param x The vector of values +#' @return (abs(x)+0.00001)^(0.5) +#' +#' @export p05 +p05 <- function(x) (abs(x)+0.00001)^(0.5) + +#' p2 polynomial term +#' +#' @param x The vector of values +#' @return x^(2) +#' +#' @export p2 +p2 <- function(x) x^(2) + +#' p3 polynomial term +#' +#' @param x The vector of values +#' @return x^(3) +#' +#' @export p3 +p3 <- function(x) x^(3) + +#' p0p0 polynomial term +#' +#' @param x The vector of values +#' @return p0(x)*p0(x) +#' +#' @export p0p0 +p0p0 <- function(x) p0(x)*p0(x) + +#' p0pm1 polynomial terms +#' +#' @param x The vector of values +#' @return p0(x)*(x+0.00001)^(-1) +#' +#' @export p0pm1 +p0pm1 <- function(x) p0(x)*(x+0.00001)^(-1) + +#' p0pm2 polynomial term +#' +#' @param x The vector of values +#' @return p0(x)*(x+0.00001)^(-2) +#' +#' @export p0pm2 +p0pm2 <- function(x) p0(x)*(x+0.00001)^(-2) + +#' p0pm05 polynomial term +#' +#' @param x The vector of values +#' @return p0(x)*(abs(x)+0.00001)^(-0.5) +#' +#' @export p0pm05 +p0pm05 <- function(x) p0(x)*(abs(x)+0.00001)^(-0.5) + +#' p0p05 polynomial term +#' +#' @param x The vector of values +#' @return p0(x)*(abs(x)+0.00001)^(0.5) +#' +#' @export p0p05 +p0p05 <- function(x) p0(x)*(abs(x)+0.00001)^(0.5) + +#' p0p1 polynomial term +#' +#' @param x The vector of values +#' @return p0(x)*x +#' +#' @export p0p1 +p0p1 <- function(x) p0(x)*x + +#' p0p2 polynomial term +#' +#' @param x The vector of values +#' @return p0(x)*x^(2) +#' +#' @export p0p2 +p0p2 <- function(x) p0(x)*x^(2) + +#' p0p3 polynomial term +#' +#' @param x The vector of values +#' @return p0(x)*x^(3) +#' +#' @export p0p3 +p0p3 <- function(x) p0(x)*x^(3) + + +#' ReLu function +#' +#' @param x The vector of values +#' @return max(x,0) +#' +#' @export relu +relu <- function(x) max(x,0) + + + + +#' erf function +#' +#' @param x The vector of values +#' @return 2 * pnorm(x * sqrt(2)) - 1 +#' +#' @export erf +erf <- function(x) 2 * pnorm(x * sqrt(2)) - 1 + + + + -- GitLab From 9741c66a33aa493e4c56ad9f7f96c3d84015e8af Mon Sep 17 00:00:00 2001 From: aliaksah Date: Tue, 20 Jun 2023 15:31:36 +0200 Subject: [PATCH 10/24] tolerance added --- R/results.R | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/R/results.R b/R/results.R index 7c44848..aa18a8a 100644 --- a/R/results.R +++ b/R/results.R @@ -134,16 +134,16 @@ model.string <- function (model, features, link) { #' @param pop The population to print for, defaults to last #' #' @export -summary.gmjmcmc <- function (results, pop = "last") { +summary.gmjmcmc <- function (results, pop = "last", tol = 0.0001) { if (pop == "last") pop <- length(results$models) - summary.mjmcmc(list(best = results$best, models = results$models[[pop]], populations = results$populations[[pop]])) + summary.mjmcmc(list(best = results$best, models = results$models[[pop]], populations = results$populations[[pop]]),tol = tol) } #' @export -summary.gmjmcmc_merged <- function (x) { +summary.gmjmcmc_merged <- function (x, tol = 0.0001) { best <- max(sapply(x$results, function (y) y$best)) feats.strings <- sapply(x$features, print) - summary_internal(best, feats.strings, x$marg.probs) + summary_internal(best, feats.strings, x$marg.probs, tol = tol) } #' Function to print a quick summary of the results @@ -151,8 +151,8 @@ summary.gmjmcmc_merged <- function (x) { #' @param results The results to use #' #' @export -summary.mjmcmc <- function (results) { - return(summary.mjmcmc_parallel(list(results))) +summary.mjmcmc <- function (results, tol = 0.0001) { + return(summary.mjmcmc_parallel(list(results),tol = tol)) } #' Function to print a quick summary of the results @@ -160,24 +160,25 @@ summary.mjmcmc <- function (results) { #' @param results The results to use #' #' @export -summary.mjmcmc_parallel <- function (results) { +summary.mjmcmc_parallel <- function (results , tol = 0.0001) { # Get features as strings for printing feats.strings <- sapply(results[[1]]$populations, print.feature, round = 2) # Get marginal posterior of features models <- unlist(lapply(results, function (x) x$models), recursive = FALSE) marg.probs <- marginal.probs.renorm(models)$probs best <- max(sapply(results, function (x) x$best)) - return(summary_internal(best, feats.strings, marg.probs)) + return(summary_internal(best, feats.strings, marg.probs, tol = tol)) } -summary_internal <- function (best, feats.strings, marg.probs) { +summary_internal <- function (best, feats.strings, marg.probs, tol = 0.0001) { # Print the final distribution + keep <- which(marg.probs[1, ] > tol) cat(" Importance | Feature\n") - print.dist(marg.probs, feats.strings, -1) + print.dist(marg.probs[keep], feats.strings[keep], -1) # Print the best marginal likelihood cat("\nBest marginal likelihood: ", best, "\n") - - keep <- which(marg.probs[1, ] > 0.0001) + + feats.strings <- feats.strings[keep] marg.probs <- marg.probs[1,keep] -- GitLab From 670b495ec29171125cf520ef4c1339c4da82e78a Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 21 Jun 2023 09:43:53 +0200 Subject: [PATCH 11/24] changing the default alpha strategy to 1 and reducing the probability of projections to 0.1 by default --- R/arguments.R | 4 ++-- R/gmjmcmc.R | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/R/arguments.R b/R/arguments.R index 57513e6..6ae9f2d 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -36,7 +36,7 @@ gen.probs.gmjmcmc <- function (transforms) { ## Feature generation probabilities transcount <- length(transforms) filter <- 0.6 # filtration threshold - gen <- rep(1 / 4, 4) # probability for different feature generation methods + gen <- c(0.30,0.30,0.10,0.30) # probability for different feature generation methods trans <- rep(1 / transcount, transcount) # probability for each different nonlinear transformation probs$filter <- filter @@ -104,7 +104,7 @@ gen.params.gmjmcmc <- function (data) { ncov <- ncol(data) - 2 feat_params <- list(D = 5, L = 15, # Hard limits on feature complexity - alpha = 0, # alpha strategy (0 = None, 1,2,3 = strategies as per Hubin et al.) TODO: Fully Bayesian + alpha = 1, # alpha strategy (0 = None, 1,2,3 = strategies as per Hubin et al.) TODO: Fully Bayesian pop.max = as.integer(ncov * 1.5), # Max features population size keep.org = F, # Always keep original covariates in every population prel.filter = 0, # Filtration threshold for first population (i.e. filter covariates even if keep.org=T) diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index 5cacd0a..c3f22e2 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -16,8 +16,11 @@ NULL #' and the rest of the columns should be the independent variables. #' @param loglik.pi The (log) density to explore #' @param loglik.alpha The likelihood function to use for alpha calculation -#' @param transforms A list of the available nonlinear transformations for feature generation -#' @param P The number of population iterations +#' @param transforms A Character vector including the names of the non-linear functions to be used by the modification +#' and the projection operator. +#' @param P The number of generations for GMJMCMC. +#' The default value is $P = 10$. +#' A larger value like $P = 50$ might be more realistic for more complicated examples where one expects a lot of non-linear structures. #' @param N.init The number of iterations per population (total iterations = (T-1)*N.init+N.final) #' @param N.final The number of iterations for the final population (total iterations = (T-1)*N.init+N.final) #' @param probs A list of the various probability vectors to use -- GitLab From 66b0bc06ffbd4814c4e07ecff5b62e11b4775979 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 21 Jun 2023 09:45:54 +0200 Subject: [PATCH 12/24] changing the default alpha strategy to 3 --- R/arguments.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/arguments.R b/R/arguments.R index 6ae9f2d..41a82ad 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -104,7 +104,7 @@ gen.params.gmjmcmc <- function (data) { ncov <- ncol(data) - 2 feat_params <- list(D = 5, L = 15, # Hard limits on feature complexity - alpha = 1, # alpha strategy (0 = None, 1,2,3 = strategies as per Hubin et al.) TODO: Fully Bayesian + alpha = 3, # alpha strategy (0 = None, 1,2,3 = strategies as per Hubin et al.) TODO: Fully Bayesian pop.max = as.integer(ncov * 1.5), # Max features population size keep.org = F, # Always keep original covariates in every population prel.filter = 0, # Filtration threshold for first population (i.e. filter covariates even if keep.org=T) -- GitLab From b9b79effff44033bc17eb3cabff02b07d4e39cd0 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 21 Jun 2023 09:52:15 +0200 Subject: [PATCH 13/24] adding default estimator functions to GMJMCMC --- R/gmjmcmc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index c3f22e2..3c68ad8 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -28,7 +28,7 @@ NULL #' @param sub An indicator that if the likelihood is inexact and should be improved each model visit (EXPERIMENTAL!) #' #' @export gmjmcmc -gmjmcmc <- function (data, loglik.pi, loglik.alpha, transforms, P = 10, N.init = 100, N.final = 100, probs = NULL, params = NULL, sub = FALSE) { +gmjmcmc <- function (data, loglik.pi = gaussian.loglik, loglik.alpha = gaussian.loglik.alpha, transforms, P = 10, N.init = 100, N.final = 100, probs = NULL, params = NULL, sub = FALSE) { # Verify that the data is well-formed data <- check.data(data) -- GitLab From dab6291609a359d271eeb01af0105bd58869f480 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 21 Jun 2023 09:56:37 +0200 Subject: [PATCH 14/24] adding the default gaussian estimators to GMJMCMC parallel --- R/parallel.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/parallel.R b/R/parallel.R index c318c67..ab09ffa 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -17,7 +17,7 @@ mjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), ...) { #' @param ... Parameters to pass to gmjmcmc #' @return Results from multiple gmjmcmc runs #' @export -gmjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), merge.options = list(populations = "best", complex.measure = 1, tol = 0), data, loglik.pi, loglik.alpha, transforms, ...) { +gmjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), merge.options = list(populations = "best", complex.measure = 1, tol = 0), data, loglik.pi = gaussian.loglik, loglik.alpha = gaussian.loglik.alpha(), transforms, ...) { options("gmjmcmc-transformations" = transforms) results <- mclapply(seq_len(runs), function (x) { gmjmcmc(data = data, loglik.pi = loglik.pi, loglik.alpha = loglik.alpha, transforms = transforms, ...) -- GitLab From 1c0a7e131097d4360cffdbf186cd6db6d97ab7c4 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 21 Jun 2023 10:33:03 +0200 Subject: [PATCH 15/24] switching back to alpha = 0 due to much faster speed deisred for the default presentation --- R/arguments.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/arguments.R b/R/arguments.R index 41a82ad..7913198 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -104,7 +104,7 @@ gen.params.gmjmcmc <- function (data) { ncov <- ncol(data) - 2 feat_params <- list(D = 5, L = 15, # Hard limits on feature complexity - alpha = 3, # alpha strategy (0 = None, 1,2,3 = strategies as per Hubin et al.) TODO: Fully Bayesian + alpha = 0, # alpha strategy (0 = None, 1,2,3 = strategies as per Hubin et al.) TODO: Fully Bayesian pop.max = as.integer(ncov * 1.5), # Max features population size keep.org = F, # Always keep original covariates in every population prel.filter = 0, # Filtration threshold for first population (i.e. filter covariates even if keep.org=T) -- GitLab From 80a0d43ca14ec036ba72ef93350785f4022b960a Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 21 Jun 2023 12:53:24 +0200 Subject: [PATCH 16/24] added a few nonlinear functions: -nrelu -hs -nhs -gelu -ngelu changed names for sini to sin.rad cose to cos.rad --- R/arguments.R | 2 +- R/nonlinear_functions.R | 111 +++++++++++++++++++++++++--------------- 2 files changed, 72 insertions(+), 41 deletions(-) diff --git a/R/arguments.R b/R/arguments.R index 7913198..ae0b597 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -36,7 +36,7 @@ gen.probs.gmjmcmc <- function (transforms) { ## Feature generation probabilities transcount <- length(transforms) filter <- 0.6 # filtration threshold - gen <- c(0.30,0.30,0.10,0.30) # probability for different feature generation methods + gen <- c(0.40,0.40,0.10,0.10) # probability for different feature generation methods trans <- rep(1 / transcount, transcount) # probability for each different nonlinear transformation probs$filter <- filter diff --git a/R/nonlinear_functions.R b/R/nonlinear_functions.R index a508b73..7d7174b 100644 --- a/R/nonlinear_functions.R +++ b/R/nonlinear_functions.R @@ -9,39 +9,32 @@ #' @return The sigmoid of x #' #' @export sigmoid -sigmoid <- function(x) exp(-x) +sigmoid <- function(x) 1 / (1 - exp(-x)) #' Sine function for degrees #' #' @param x The vector of values in degrees #' @return The sine of x #' -#' @export sini -sini <- function(x) sin(x/180*pi) +#' @export sin.rad +sin.rad <- function(x) sin(x/180*pi) #' Cosine function for degrees #' #' @param x The vector of values in degrees #' @return The cosine of x #' -#' @export cosi -cosi <- function(x) cos(x/180*pi) +#' @export cos.rad +cos.rad <- function(x) cos(x/180*pi) -#' Exponential function of absolute values +#' Double exponential function #' #' @param x The vector of values #' @return e^(-abs(x)) #' -#' @export expi -expi <- function(x) exp(-abs(x)) +#' @export exp.dbl +exp.dbl <- function(x) exp(-abs(x)) -#' Log function of absolute values -#' -#' @param x The vector of values -#' @return log(abs(x)) -#' -#' @export logi -logi <- function(x) log(abs(x)+.Machine$double.eps) #' Square root function #' @@ -91,13 +84,6 @@ gauss <- function(x) exp(-x*x) #' @export to25 to25 <- function(x)abs(x)^(2.5) -#' To 3 power -#' -#' @param x The vector of values -#' @return x^(3) -#' -#' @export to3 -to3 <- function(x)abs(x)^(3) #' To 3.5 power #' @@ -110,42 +96,42 @@ to35 <- function(x)abs(x)^(3.5) #' p0 polynomial term #' #' @param x The vector of values -#' @return log(abs(x) + 0.00001) +#' @return log(abs(x) + .Machine$double.eps) #' #' @export p0 -p0 <- function(x) log(abs(x)+0.00001) +p0 <- function(x) log(abs(x)+.Machine$double.eps) #' pm1 polynomial term #' #' @param x The vector of values -#' @return (x+0.00001)^(-1) +#' @return sign(x)*(abs(x)+.Machine$double.eps)^(-1) #' #' @export pm1 -pm1 <- function(x) (x+0.00001)^(-1) +pm1 <- function(x) sign(x)*(abs(x)+.Machine$double.eps)^(-1) #' pm2 polynomial term #' #' @param x The vector of values -#' @return (x+0.00001)^(-2) +#' @return sign(x)*(abs(x)+.Machine$double.eps)^(-2) #' #' @export pm2 -pm2 <- function(x) (x+0.00001)^(-2) +pm2 <- function(x) sign(x)*(abs(x)+.Machine$double.eps)^(-2) #' pm05 polynomial term #' #' @param x The vector of values -#' @return (abs(x)+0.00001)^(-0.5) +#' @return (abs(x)+.Machine$double.eps)^(-0.5) #' #' @export pm05 -pm05 <- function(x) (abs(x)+0.00001)^(-0.5) +pm05 <- function(x) (abs(x)+.Machine$double.eps)^(-0.5) #' p05 polynomial term #' #' @param x The vector of values -#' @return (abs(x)+0.00001)^(0.5) +#' @return (abs(x)+.Machine$double.eps)^(0.5) #' #' @export p05 -p05 <- function(x) (abs(x)+0.00001)^(0.5) +p05 <- function(x) (abs(x)+.Machine$double.eps)^(0.5) #' p2 polynomial term #' @@ -174,34 +160,34 @@ p0p0 <- function(x) p0(x)*p0(x) #' p0pm1 polynomial terms #' #' @param x The vector of values -#' @return p0(x)*(x+0.00001)^(-1) +#' @return p0(x)*(x+.Machine$double.eps)^(-1) #' #' @export p0pm1 -p0pm1 <- function(x) p0(x)*(x+0.00001)^(-1) +p0pm1 <- function(x) p0(x)*(x+.Machine$double.eps)^(-1) #' p0pm2 polynomial term #' #' @param x The vector of values -#' @return p0(x)*(x+0.00001)^(-2) +#' @return p0(x)*sign(x)*(abs(x)+.Machine$double.eps)^(-2) #' #' @export p0pm2 -p0pm2 <- function(x) p0(x)*(x+0.00001)^(-2) +p0pm2 <- function(x) p0(x)*sign(x)*(abs(x)+.Machine$double.eps)^(-2) #' p0pm05 polynomial term #' #' @param x The vector of values -#' @return p0(x)*(abs(x)+0.00001)^(-0.5) +#' @return p0(x)*sign(x)*(abs(x)+.Machine$double.eps)^(-0.5) #' #' @export p0pm05 -p0pm05 <- function(x) p0(x)*(abs(x)+0.00001)^(-0.5) +p0pm05 <- function(x) p0(x)*(abs(x)+.Machine$double.eps)^(-0.5) #' p0p05 polynomial term #' #' @param x The vector of values -#' @return p0(x)*(abs(x)+0.00001)^(0.5) +#' @return p0(x)*(abs(x)+.Machine$double.eps)^(0.5) #' #' @export p0p05 -p0p05 <- function(x) p0(x)*(abs(x)+0.00001)^(0.5) +p0p05 <- function(x) p0(x)*(abs(x)+.Machine$double.eps)^(0.5) #' p0p1 polynomial term #' @@ -236,8 +222,39 @@ p0p3 <- function(x) p0(x)*x^(3) #' @export relu relu <- function(x) max(x,0) +#' negative ReLu function +#' +#' @param x The vector of values +#' @return max(x,0) +#' +#' @export nrelu +nrelu <- function(x) max(x,0) +#' negative ReLu function +#' +#' @param x The vector of values +#' @return max(-x,0) +#' +#' @export nrelu +nrelu <- function(x) max(-x,0) + +#' GELU function +#' +#' @param x The vector of values +#' @return x*pnorm(x) +#' +#' @export gelu +gelu <- function(x)x *pnorm(x) + + +#' Negative GELU function +#' +#' @param x The vector of values +#' @return -x*pnorm(-x) +#' +#' @export ngelu +ngelu <- function(x) -x*pnorm(-x) #' erf function #' @@ -248,5 +265,19 @@ relu <- function(x) max(x,0) erf <- function(x) 2 * pnorm(x * sqrt(2)) - 1 +#' heavy side function +#' +#' @param x The vector of values +#' @return as.integer(x>0) +#' +#' @export hs +hs <- function(x) as.integer(x>0) +#' negative heavy side function +#' +#' @param x The vector of values +#' @return as.integer(x<0) +#' +#' @export nhs +nhs <- function(x) as.integer(x<0) -- GitLab From decea6b23f99601aad5ec4e07a10b83281f65469 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 21 Jun 2023 16:24:26 +0200 Subject: [PATCH 17/24] update documentations --- NAMESPACE | 30 ++++++++++++++++++++++++++---- man/cosi.Rd | 17 ----------------- man/expi.Rd | 17 ----------------- man/gaussian.loglik.Rd | 2 +- man/gmjmcmc.Rd | 11 +++++++---- man/gmjmcmc.parallel.Rd | 6 +++--- man/logi.Rd | 17 ----------------- man/sini.Rd | 17 ----------------- man/summary.gmjmcmc.Rd | 2 +- man/summary.mjmcmc.Rd | 2 +- man/summary.mjmcmc_parallel.Rd | 2 +- man/to3.Rd | 17 ----------------- 12 files changed, 40 insertions(+), 100 deletions(-) delete mode 100644 man/cosi.Rd delete mode 100644 man/expi.Rd delete mode 100644 man/logi.Rd delete mode 100644 man/sini.Rd delete mode 100644 man/to3.Rd diff --git a/NAMESPACE b/NAMESPACE index fb936b3..ac2b264 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,20 +14,23 @@ S3method(summary,gmjmcmc) S3method(summary,gmjmcmc_merged) S3method(summary,mjmcmc) S3method(summary,mjmcmc_parallel) -export(cosi) -export(expi) +export(cos.rad) +export(erf) +export(exp.dbl) export(gauss) export(gaussian.loglik) export(gaussian.loglik.alpha) export(gaussian.loglik.bic.irlssgd) +export(gelu) export(gen.params.gmjmcmc) export(gen.params.mjmcmc) export(gen.probs.gmjmcmc) +export(gen.probs.mjmcmc) export(gmjmcmc) export(gmjmcmc.iact) export(gmjmcmc.parallel) +export(hs) export(linear.g.prior.loglik) -export(logi) export(logistic.loglik) export(logistic.loglik.alpha) export(marginal.probs) @@ -35,9 +38,28 @@ export(merge.results) export(mjmcmc) export(mjmcmc.parallel) export(model.string) +export(ngelu) +export(nhs) +export(nrelu) +export(p0) +export(p05) +export(p0p0) +export(p0p05) +export(p0p1) +export(p0p2) +export(p0p3) +export(p0pm05) +export(p0pm1) +export(p0pm2) +export(p2) +export(p3) +export(pm05) +export(pm1) +export(pm2) +export(relu) export(set.transforms) export(sigmoid) -export(sini) +export(sin.rad) export(sqroot) export(to25) export(to35) diff --git a/man/cosi.Rd b/man/cosi.Rd deleted file mode 100644 index abe43f8..0000000 --- a/man/cosi.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/nonlinear_functions.R -\name{cosi} -\alias{cosi} -\title{Cosine function for degrees} -\usage{ -cosi(x) -} -\arguments{ -\item{x}{The vector of values in degrees} -} -\value{ -The cosine of x -} -\description{ -Cosine function for degrees -} diff --git a/man/expi.Rd b/man/expi.Rd deleted file mode 100644 index f6fa4d0..0000000 --- a/man/expi.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/nonlinear_functions.R -\name{expi} -\alias{expi} -\title{Exponential function of absolute values} -\usage{ -expi(x) -} -\arguments{ -\item{x}{The vector of values} -} -\value{ -e^(-abs(x)) -} -\description{ -Exponential function of absolute values -} diff --git a/man/gaussian.loglik.Rd b/man/gaussian.loglik.Rd index 18ae582..24ad6f8 100644 --- a/man/gaussian.loglik.Rd +++ b/man/gaussian.loglik.Rd @@ -4,7 +4,7 @@ \alias{gaussian.loglik} \title{Log likelihood function for gaussian regression with a prior p(m)=r*sum(total_width).} \usage{ -gaussian.loglik(y, x, model, complex, params = list(r = 1)) +gaussian.loglik(y, x, model, complex, params) } \arguments{ \item{y}{A vector containing the dependent variable} diff --git a/man/gmjmcmc.Rd b/man/gmjmcmc.Rd index 4c913d8..a45cf98 100644 --- a/man/gmjmcmc.Rd +++ b/man/gmjmcmc.Rd @@ -7,8 +7,8 @@ TODO: More documentation - borrow from https://github.com/aliaksah/EMJMCMC2016/b \usage{ gmjmcmc( data, - loglik.pi, - loglik.alpha, + loglik.pi = gaussian.loglik, + loglik.alpha = gaussian.loglik.alpha, transforms, P = 10, N.init = 100, @@ -27,9 +27,12 @@ and the rest of the columns should be the independent variables.} \item{loglik.alpha}{The likelihood function to use for alpha calculation} -\item{transforms}{A list of the available nonlinear transformations for feature generation} +\item{transforms}{A Character vector including the names of the non-linear functions to be used by the modification +and the projection operator.} -\item{P}{The number of population iterations} +\item{P}{The number of generations for GMJMCMC. +The default value is $P = 10$. +A larger value like $P = 50$ might be more realistic for more complicated examples where one expects a lot of non-linear structures.} \item{N.init}{The number of iterations per population (total iterations = (T-1)*N.init+N.final)} diff --git a/man/gmjmcmc.parallel.Rd b/man/gmjmcmc.parallel.Rd index 9678d1e..f4f172a 100644 --- a/man/gmjmcmc.parallel.Rd +++ b/man/gmjmcmc.parallel.Rd @@ -7,10 +7,10 @@ gmjmcmc.parallel( runs, cores = getOption("mc.cores", 2L), - merge.options = NULL, + merge.options = list(populations = "best", complex.measure = 1, tol = 0), data, - loglik.pi, - loglik.alpha, + loglik.pi = gaussian.loglik, + loglik.alpha = gaussian.loglik.alpha(), transforms, ... ) diff --git a/man/logi.Rd b/man/logi.Rd deleted file mode 100644 index 9c8cb92..0000000 --- a/man/logi.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/nonlinear_functions.R -\name{logi} -\alias{logi} -\title{Log function of absolute values} -\usage{ -logi(x) -} -\arguments{ -\item{x}{The vector of values} -} -\value{ -log(abs(x)) -} -\description{ -Log function of absolute values -} diff --git a/man/sini.Rd b/man/sini.Rd deleted file mode 100644 index 37f67bb..0000000 --- a/man/sini.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/nonlinear_functions.R -\name{sini} -\alias{sini} -\title{Sine function for degrees} -\usage{ -sini(x) -} -\arguments{ -\item{x}{The vector of values in degrees} -} -\value{ -The sine of x -} -\description{ -Sine function for degrees -} diff --git a/man/summary.gmjmcmc.Rd b/man/summary.gmjmcmc.Rd index cb50752..2e4d130 100644 --- a/man/summary.gmjmcmc.Rd +++ b/man/summary.gmjmcmc.Rd @@ -4,7 +4,7 @@ \alias{summary.gmjmcmc} \title{Function to print a quick summary of the results} \usage{ -\method{summary}{gmjmcmc}(results, pop = "last") +\method{summary}{gmjmcmc}(results, pop = "last", tol = 1e-04) } \arguments{ \item{results}{The results to use} diff --git a/man/summary.mjmcmc.Rd b/man/summary.mjmcmc.Rd index 012e418..1bb2139 100644 --- a/man/summary.mjmcmc.Rd +++ b/man/summary.mjmcmc.Rd @@ -4,7 +4,7 @@ \alias{summary.mjmcmc} \title{Function to print a quick summary of the results} \usage{ -\method{summary}{mjmcmc}(results) +\method{summary}{mjmcmc}(results, tol = 1e-04) } \arguments{ \item{results}{The results to use} diff --git a/man/summary.mjmcmc_parallel.Rd b/man/summary.mjmcmc_parallel.Rd index 4cc0f05..d005983 100644 --- a/man/summary.mjmcmc_parallel.Rd +++ b/man/summary.mjmcmc_parallel.Rd @@ -4,7 +4,7 @@ \alias{summary.mjmcmc_parallel} \title{Function to print a quick summary of the results} \usage{ -\method{summary}{mjmcmc_parallel}(results) +\method{summary}{mjmcmc_parallel}(results, tol = 1e-04) } \arguments{ \item{results}{The results to use} diff --git a/man/to3.Rd b/man/to3.Rd deleted file mode 100644 index a7a5f86..0000000 --- a/man/to3.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/nonlinear_functions.R -\name{to3} -\alias{to3} -\title{To 3 power} -\usage{ -to3(x) -} -\arguments{ -\item{x}{The vector of values} -} -\value{ -x^(3) -} -\description{ -To 3 power -} -- GitLab From 0e6dfc0fae01de397e6dff41d0bc27dcef7be24c Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 21 Jun 2023 17:35:19 +0200 Subject: [PATCH 18/24] added properly prefiltering --- R/gmjmcmc.R | 2 +- man/plot.gmjmcmc_merged.Rd | 2 +- man/plot.mjmcmc_parallel.Rd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index 3c68ad8..455aacb 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -72,7 +72,7 @@ gmjmcmc <- function (data, loglik.pi = gaussian.loglik, loglik.alpha = gaussian. if (p != P) N <- N.init else N <- N.final # Precalculate covariates and put them in data.t - if (p != 1) data.t <- precalc.features(data, S[[p]]) + if (p != 1 || length(params$prel.filter)>0) data.t <- precalc.features(data, S[[p]]) else data.t <- data # Initialize first model of population model.cur <- as.logical(rbinom(n = length(S[[p]]), size = 1, prob = 0.5)) diff --git a/man/plot.gmjmcmc_merged.Rd b/man/plot.gmjmcmc_merged.Rd index 7b37a8d..1239cb5 100644 --- a/man/plot.gmjmcmc_merged.Rd +++ b/man/plot.gmjmcmc_merged.Rd @@ -4,7 +4,7 @@ \alias{plot.gmjmcmc_merged} \title{Plot a gmjmcmc_merged run} \usage{ -\method{plot}{gmjmcmc_merged}(x) +\method{plot}{gmjmcmc_merged}(x, count = "all") } \description{ Plot a gmjmcmc_merged run diff --git a/man/plot.mjmcmc_parallel.Rd b/man/plot.mjmcmc_parallel.Rd index 51c3490..ac381a2 100644 --- a/man/plot.mjmcmc_parallel.Rd +++ b/man/plot.mjmcmc_parallel.Rd @@ -4,7 +4,7 @@ \alias{plot.mjmcmc_parallel} \title{Plot a mjmcmc_parallel run} \usage{ -\method{plot}{mjmcmc_parallel}(x) +\method{plot}{mjmcmc_parallel}(x, count = "all") } \description{ Plot a mjmcmc_parallel run -- GitLab From 04ecde345323cd70fa1794dad483090e6cff05f2 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Thu, 22 Jun 2023 14:25:17 +0200 Subject: [PATCH 19/24] not function added to handle logic regressions --- R/likelihoods.R | 2 ++ R/nonlinear_functions.R | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/R/likelihoods.R b/R/likelihoods.R index 3b8099d..061dbae 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -102,3 +102,5 @@ linear.g.prior.loglik <- function (y, x, model, complex, params = list(g = 4)) { logmarglik <- 0.5 * (log(1 + params$g) * (n - p) - log(1 + params$g * (1 - rsquared)) * (n - 1)) * (p != 1) return(logmarglik) } + + diff --git a/R/nonlinear_functions.R b/R/nonlinear_functions.R index 7d7174b..99642f0 100644 --- a/R/nonlinear_functions.R +++ b/R/nonlinear_functions.R @@ -281,3 +281,12 @@ hs <- function(x) as.integer(x>0) #' #' @export nhs nhs <- function(x) as.integer(x<0) + +#' not x +#' +#' @param x The vector of binary values +#' @return 1-x +#' +#' @export not +not <- function(x) (1-x) + -- GitLab From 6397c16a87339330fc19026d71bb5571f19f48ec Mon Sep 17 00:00:00 2001 From: aliaksah Date: Thu, 22 Jun 2023 16:37:38 +0200 Subject: [PATCH 20/24] prefiltering fixed properly --- .gitignore | 3 ++- R/gmjmcmc.R | 7 ++++--- R/parallel.R | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 0b71f38..bccc9fa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ inst/doc -run_algorithm.R \ No newline at end of file +run_algorithm.R +.Rproj.user diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index 455aacb..20e903e 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -59,10 +59,10 @@ gmjmcmc <- function (data, loglik.pi = gaussian.loglik, loglik.alpha = gaussian. # Create first population F.0 <- gen.covariates(ncol(data) - 2) - if (is.null(params$prel.filter)) + if (is.null(params$feat$prel.filter)) S[[1]] <- F.0 else - S[[1]] <- F.0[params$prel.filter] + S[[1]] <- F.0[params$feat$prel.filter] complex <- complex.features(S[[1]]) @@ -72,8 +72,9 @@ gmjmcmc <- function (data, loglik.pi = gaussian.loglik, loglik.alpha = gaussian. if (p != P) N <- N.init else N <- N.final # Precalculate covariates and put them in data.t - if (p != 1 || length(params$prel.filter)>0) data.t <- precalc.features(data, S[[p]]) + if (length(params$feat$prel.filter)>0 | p!=1) data.t <- precalc.features(data, S[[p]]) else data.t <- data + # Initialize first model of population model.cur <- as.logical(rbinom(n = length(S[[p]]), size = 1, prob = 0.5)) model.cur.res <- loglik.pre(loglik.pi, model.cur, complex, data.t, params$loglik) diff --git a/R/parallel.R b/R/parallel.R index ab09ffa..53d7351 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -23,6 +23,6 @@ gmjmcmc.parallel <- function (runs, cores = getOption("mc.cores", 2L), merge.opt gmjmcmc(data = data, loglik.pi = loglik.pi, loglik.alpha = loglik.alpha, transforms = transforms, ...) }, mc.cores = cores) class(results) <- "gmjmcmc_parallel" - merged <- merge.results(results, merge.options$populations, merge.options$complex.measure, merge.options$tol) + merged <- merge.results(results, merge.options$populations, merge.options$complex.measure, merge.options$tol,data = data) return(merged) } -- GitLab From 62ff3bb7a9fc548db93b32e2c9bb77a78b4bb588 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Fri, 23 Jun 2023 12:18:46 +0200 Subject: [PATCH 21/24] prel filter set to null --- R/arguments.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/arguments.R b/R/arguments.R index ae0b597..b05b5cd 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -107,7 +107,7 @@ gen.params.gmjmcmc <- function (data) { alpha = 0, # alpha strategy (0 = None, 1,2,3 = strategies as per Hubin et al.) TODO: Fully Bayesian pop.max = as.integer(ncov * 1.5), # Max features population size keep.org = F, # Always keep original covariates in every population - prel.filter = 0, # Filtration threshold for first population (i.e. filter covariates even if keep.org=T) + prel.filter = NULL, # Filtration threshold for first population (i.e. filter covariates even if keep.org=T) keep.min = 0.8, # Minimum proportion of features to always keep [0,1] eps = 0.05, # Inclusion probability limit for feature generation check.col = T, # Whether the colinearity should be checked -- GitLab From 7c118b3b91a8e8fa38bc544d3546acac156caf88 Mon Sep 17 00:00:00 2001 From: Jon Date: Sun, 25 Jun 2023 17:03:03 +0200 Subject: [PATCH 22/24] Add trans.priors to the features operation count - adding the possibility to have different complexities per transformation. --- R/arguments.R | 4 +++- R/feature.R | 5 +++-- R/feature_generation.R | 12 ++++++------ tests/testthat/test_feature.R | 12 ++++++------ tests/testthat/test_mjmcmc.R | 4 ++-- 5 files changed, 20 insertions(+), 17 deletions(-) diff --git a/R/arguments.R b/R/arguments.R index b05b5cd..cd3d062 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -36,12 +36,14 @@ gen.probs.gmjmcmc <- function (transforms) { ## Feature generation probabilities transcount <- length(transforms) filter <- 0.6 # filtration threshold - gen <- c(0.40,0.40,0.10,0.10) # probability for different feature generation methods + gen <- c(0.40,0.40,0.10,0.10) # probability for different feature generation methods trans <- rep(1 / transcount, transcount) # probability for each different nonlinear transformation + trans_priors <- rep(1, transcount) # Default values assigned to each transformation to be used as "operation count". probs$filter <- filter probs$gen <- gen probs$trans <- trans + probs$trans_priors <- trans_priors return(probs) } diff --git a/R/feature.R b/R/feature.R index ec1712e..76d7270 100644 --- a/R/feature.R +++ b/R/feature.R @@ -21,7 +21,7 @@ #' @param transform A numeric denoting the transform type #' @param features A list of features to include #' @param alphas A numeric vector denoting the alphas to use -create.feature <- function (transform, features, alphas=NULL) { +create.feature <- function (transform, features, trans.priors, alphas=NULL) { # Given no alphas, assume no intercept and unit coefficients if (is.null(alphas)) alphas <- c(0, rep(1, length(features))) if (length(alphas) != (length(features) + 1)) stop("Invalid alpha/feature count") @@ -33,7 +33,8 @@ create.feature <- function (transform, features, alphas=NULL) { } else { depth <- 0 # Assume 0 depth to find the deepest included feature - oc <- length(features) # Every + is an operation, and the outer transform is also one + oc <- length(features) - 1 # Every + is an operation, and the outer transform is also one + oc <- oc + trans.priors[transform] width <- length(features) # Width is the number of features for (i in 1:(length(features))) { locdepth <- depth.feature(features[[i]]) diff --git a/R/feature_generation.R b/R/feature_generation.R index 0f17e25..492e24e 100644 --- a/R/feature_generation.R +++ b/R/feature_generation.R @@ -11,14 +11,14 @@ gen.multiplication <- function (features, marg.probs) { } # Generate a modification feature -gen.modification <- function (features, marg.probs, trans.probs) { +gen.modification <- function (features, marg.probs, trans.probs, trans.priors) { feat <- sample.int(n = length(features), size = 1, prob = marg.probs) trans <- sample.int(n = length(trans.probs), size = 1, prob = trans.probs) - create.feature(trans, features[feat]) + create.feature(trans, features[feat], trans.priors) } # Generate a projection feature -gen.projection <- function (features, marg.probs, trans.probs, max.width, max.size) { +gen.projection <- function (features, marg.probs, trans.probs, max.width, max.size, trans.priors) { if (!is.null(max.size)) { max.width <- min(max.width, max.size + 1) } @@ -27,7 +27,7 @@ gen.projection <- function (features, marg.probs, trans.probs, max.width, max.si trans <- sample.int(n = length(trans.probs), size = 1, prob = trans.probs) # TODO: Generate alphas properly using various methods alphas <- rep(1, length(feats)+1) - create.feature(trans, features[feats], alphas) + create.feature(trans, features[feats], trans.priors, alphas) } # Generate new features from the initial covariates @@ -43,8 +43,8 @@ gen.feature <- function (features, marg.probs, data, loglik.alpha, probs, F.0.si while (!feat.ok && tries < 50) { feat.type <- sample.int(n = 4, size = 1, prob = probs$gen) if (feat.type == 1) feat <- gen.multiplication(features, marg.probs) - if (feat.type == 2) feat <- gen.modification(features, marg.probs, probs$trans) - if (feat.type == 3) feat <- gen.projection(features, marg.probs, probs$trans, params$L, params$max.proj.size) + if (feat.type == 2) feat <- gen.modification(features, marg.probs, probs$trans, probs$trans_priors) + if (feat.type == 3) feat <- gen.projection(features, marg.probs, probs$trans, params$L, params$max.proj.size, probs$trans_priors) if (feat.type == 4) feat <- gen.new(features, F.0.size) # Check that the feature is not too wide or deep if (!(depth.feature(feat) > params$D || width.feature(feat) > params$L)) { diff --git a/tests/testthat/test_feature.R b/tests/testthat/test_feature.R index a02025f..3d23ae4 100644 --- a/tests/testthat/test_feature.R +++ b/tests/testthat/test_feature.R @@ -8,9 +8,9 @@ context("Feature class") test_that("Simple features are created", { transforms <- list("sin", "cos") set.transforms(transforms) - feature1 <- create.feature(1, 1) # sin(x1) - feature2 <- create.feature(1, list(1,2)) # sin(1*x1+1*x2) - feature3 <- create.feature(0, list(feature1,feature2)) # (sin(x1)*sin(1*x1+1*x2)) + feature1 <- create.feature(1, 1, 1) # sin(x1) + feature2 <- create.feature(1, list(1,2), 1) # sin(1*x1+1*x2) + feature3 <- create.feature(0, list(feature1,feature2), 1) # (sin(x1)*sin(1*x1+1*x2)) expect_equal(print(feature1), "sin(x1)") expect_equal(print(feature2), "sin(1*x1+1*x2)") expect_equal(print(feature3), "(sin(x1)*sin(1*x1+1*x2))") @@ -19,8 +19,8 @@ test_that("Simple features are created", { test_that("Projection feature is created", { transforms <- list("sin", "cos") set.transforms(transforms) - feature1 <- create.feature(1, 1) # sin(x1) - feature2 <- create.feature(1, list(1,2)) # sin(x1+x2) - feature3 <- create.feature(2, list(feature1,feature2,3), c(0.2,-1.5,7,9)) # cos(0.2+-1.5*sin(x1)+7*sin(1*x1+1*x2)+9*x3) + feature1 <- create.feature(1, 1, 1) # sin(x1) + feature2 <- create.feature(1, list(1,2), 1) # sin(x1+x2) + feature3 <- create.feature(2, list(feature1,feature2,3), c(1, 1), c(0.2,-1.5,7,9)) # cos(0.2+-1.5*sin(x1)+7*sin(1*x1+1*x2)+9*x3) expect_equal(print(feature3), "cos(0.2+-1.5*sin(x1)+7*sin(1*x1+1*x2)+9*x3)") }) \ No newline at end of file diff --git a/tests/testthat/test_mjmcmc.R b/tests/testthat/test_mjmcmc.R index f928725..996506f 100644 --- a/tests/testthat/test_mjmcmc.R +++ b/tests/testthat/test_mjmcmc.R @@ -22,7 +22,7 @@ test_that("Testing MJMCMC algorithm", { plot(resm) predm <- predict(resm, cbind(1, data[, -1, drop = FALSE])) - resg <- gmjmcmc(data, loglik.tester, NULL, c("logi", "exp")) + resg <- gmjmcmc(data, loglik.tester, NULL, c("p0", "exp.dbl")) summary(resg) plot(resg) prediction <- predict(resg, cbind(1, data[, -1, drop = FALSE])) @@ -32,7 +32,7 @@ test_that("Testing MJMCMC algorithm", { plot(respm) pred_pm <- predict(respm, cbind(1, data[, -1, drop = FALSE])) - respg <- gmjmcmc.parallel(2, 2, NULL, data, loglik.tester, NULL, c("logi", "exp")) + respg <- gmjmcmc.parallel(2, 2, NULL, data, loglik.tester, NULL, c("p0", "exp.dbl")) summary(respg) plot(respg) pred_pg <- predict(respg, cbind(1, data[, -1, drop = FALSE])) -- GitLab From 7662afdb11fae6c56e4e599297df9d5d31c70f09 Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Fri, 30 Jun 2023 20:10:42 +0200 Subject: [PATCH 23/24] Update docs. --- NAMESPACE | 1 + man/cos.rad.Rd | 17 +++++++++++++++++ man/create.feature.Rd | 2 +- man/erf.Rd | 17 +++++++++++++++++ man/exp.dbl.Rd | 17 +++++++++++++++++ man/gelu.Rd | 17 +++++++++++++++++ man/hs.Rd | 17 +++++++++++++++++ man/ngelu.Rd | 17 +++++++++++++++++ man/nhs.Rd | 17 +++++++++++++++++ man/not.Rd | 17 +++++++++++++++++ man/nrelu.Rd | 23 +++++++++++++++++++++++ man/p0.Rd | 17 +++++++++++++++++ man/p05.Rd | 17 +++++++++++++++++ man/p0p0.Rd | 17 +++++++++++++++++ man/p0p05.Rd | 17 +++++++++++++++++ man/p0p1.Rd | 17 +++++++++++++++++ man/p0p2.Rd | 17 +++++++++++++++++ man/p0p3.Rd | 17 +++++++++++++++++ man/p0pm05.Rd | 17 +++++++++++++++++ man/p0pm1.Rd | 17 +++++++++++++++++ man/p0pm2.Rd | 17 +++++++++++++++++ man/p2.Rd | 17 +++++++++++++++++ man/p3.Rd | 17 +++++++++++++++++ man/pm05.Rd | 17 +++++++++++++++++ man/pm1.Rd | 17 +++++++++++++++++ man/pm2.Rd | 17 +++++++++++++++++ man/relu.Rd | 17 +++++++++++++++++ man/sin.rad.Rd | 17 +++++++++++++++++ 28 files changed, 450 insertions(+), 1 deletion(-) create mode 100644 man/cos.rad.Rd create mode 100644 man/erf.Rd create mode 100644 man/exp.dbl.Rd create mode 100644 man/gelu.Rd create mode 100644 man/hs.Rd create mode 100644 man/ngelu.Rd create mode 100644 man/nhs.Rd create mode 100644 man/not.Rd create mode 100644 man/nrelu.Rd create mode 100644 man/p0.Rd create mode 100644 man/p05.Rd create mode 100644 man/p0p0.Rd create mode 100644 man/p0p05.Rd create mode 100644 man/p0p1.Rd create mode 100644 man/p0p2.Rd create mode 100644 man/p0p3.Rd create mode 100644 man/p0pm05.Rd create mode 100644 man/p0pm1.Rd create mode 100644 man/p0pm2.Rd create mode 100644 man/p2.Rd create mode 100644 man/p3.Rd create mode 100644 man/pm05.Rd create mode 100644 man/pm1.Rd create mode 100644 man/pm2.Rd create mode 100644 man/relu.Rd create mode 100644 man/sin.rad.Rd diff --git a/NAMESPACE b/NAMESPACE index ac2b264..8231eb8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,7 @@ export(mjmcmc.parallel) export(model.string) export(ngelu) export(nhs) +export(not) export(nrelu) export(p0) export(p05) diff --git a/man/cos.rad.Rd b/man/cos.rad.Rd new file mode 100644 index 0000000..3d69cbb --- /dev/null +++ b/man/cos.rad.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{cos.rad} +\alias{cos.rad} +\title{Cosine function for degrees} +\usage{ +\method{cos}{rad}(x) +} +\arguments{ +\item{x}{The vector of values in degrees} +} +\value{ +The cosine of x +} +\description{ +Cosine function for degrees +} diff --git a/man/create.feature.Rd b/man/create.feature.Rd index 697b527..8c5fd79 100644 --- a/man/create.feature.Rd +++ b/man/create.feature.Rd @@ -4,7 +4,7 @@ \alias{create.feature} \title{Create method for "feature" class} \usage{ -create.feature(transform, features, alphas = NULL) +create.feature(transform, features, trans.priors, alphas = NULL) } \arguments{ \item{transform}{A numeric denoting the transform type} diff --git a/man/erf.Rd b/man/erf.Rd new file mode 100644 index 0000000..dd8c854 --- /dev/null +++ b/man/erf.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{erf} +\alias{erf} +\title{erf function} +\usage{ +erf(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +2 * pnorm(x * sqrt(2)) - 1 +} +\description{ +erf function +} diff --git a/man/exp.dbl.Rd b/man/exp.dbl.Rd new file mode 100644 index 0000000..2b8e1e2 --- /dev/null +++ b/man/exp.dbl.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{exp.dbl} +\alias{exp.dbl} +\title{Double exponential function} +\usage{ +\method{exp}{dbl}(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +e^(-abs(x)) +} +\description{ +Double exponential function +} diff --git a/man/gelu.Rd b/man/gelu.Rd new file mode 100644 index 0000000..61ce351 --- /dev/null +++ b/man/gelu.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{gelu} +\alias{gelu} +\title{GELU function} +\usage{ +gelu(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +x*pnorm(x) +} +\description{ +GELU function +} diff --git a/man/hs.Rd b/man/hs.Rd new file mode 100644 index 0000000..f63fa66 --- /dev/null +++ b/man/hs.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{hs} +\alias{hs} +\title{heavy side function} +\usage{ +hs(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +as.integer(x>0) +} +\description{ +heavy side function +} diff --git a/man/ngelu.Rd b/man/ngelu.Rd new file mode 100644 index 0000000..bb5c88e --- /dev/null +++ b/man/ngelu.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{ngelu} +\alias{ngelu} +\title{Negative GELU function} +\usage{ +ngelu(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +-x*pnorm(-x) +} +\description{ +Negative GELU function +} diff --git a/man/nhs.Rd b/man/nhs.Rd new file mode 100644 index 0000000..93d87af --- /dev/null +++ b/man/nhs.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{nhs} +\alias{nhs} +\title{negative heavy side function} +\usage{ +nhs(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +as.integer(x<0) +} +\description{ +negative heavy side function +} diff --git a/man/not.Rd b/man/not.Rd new file mode 100644 index 0000000..6754bd1 --- /dev/null +++ b/man/not.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{not} +\alias{not} +\title{not x} +\usage{ +not(x) +} +\arguments{ +\item{x}{The vector of binary values} +} +\value{ +1-x +} +\description{ +not x +} diff --git a/man/nrelu.Rd b/man/nrelu.Rd new file mode 100644 index 0000000..0403328 --- /dev/null +++ b/man/nrelu.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{nrelu} +\alias{nrelu} +\title{negative ReLu function} +\usage{ +nrelu(x) + +nrelu(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +max(x,0) + +max(-x,0) +} +\description{ +negative ReLu function + +negative ReLu function +} diff --git a/man/p0.Rd b/man/p0.Rd new file mode 100644 index 0000000..ceb4e29 --- /dev/null +++ b/man/p0.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{p0} +\alias{p0} +\title{p0 polynomial term} +\usage{ +p0(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +log(abs(x) + .Machine$double.eps) +} +\description{ +p0 polynomial term +} diff --git a/man/p05.Rd b/man/p05.Rd new file mode 100644 index 0000000..9dcaad8 --- /dev/null +++ b/man/p05.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{p05} +\alias{p05} +\title{p05 polynomial term} +\usage{ +p05(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +(abs(x)+.Machine$double.eps)^(0.5) +} +\description{ +p05 polynomial term +} diff --git a/man/p0p0.Rd b/man/p0p0.Rd new file mode 100644 index 0000000..bc2a153 --- /dev/null +++ b/man/p0p0.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{p0p0} +\alias{p0p0} +\title{p0p0 polynomial term} +\usage{ +p0p0(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +p0(x)*p0(x) +} +\description{ +p0p0 polynomial term +} diff --git a/man/p0p05.Rd b/man/p0p05.Rd new file mode 100644 index 0000000..af225da --- /dev/null +++ b/man/p0p05.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{p0p05} +\alias{p0p05} +\title{p0p05 polynomial term} +\usage{ +p0p05(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +p0(x)*(abs(x)+.Machine$double.eps)^(0.5) +} +\description{ +p0p05 polynomial term +} diff --git a/man/p0p1.Rd b/man/p0p1.Rd new file mode 100644 index 0000000..cdbfe79 --- /dev/null +++ b/man/p0p1.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{p0p1} +\alias{p0p1} +\title{p0p1 polynomial term} +\usage{ +p0p1(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +p0(x)*x +} +\description{ +p0p1 polynomial term +} diff --git a/man/p0p2.Rd b/man/p0p2.Rd new file mode 100644 index 0000000..10b00ac --- /dev/null +++ b/man/p0p2.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{p0p2} +\alias{p0p2} +\title{p0p2 polynomial term} +\usage{ +p0p2(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +p0(x)*x^(2) +} +\description{ +p0p2 polynomial term +} diff --git a/man/p0p3.Rd b/man/p0p3.Rd new file mode 100644 index 0000000..49c5037 --- /dev/null +++ b/man/p0p3.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{p0p3} +\alias{p0p3} +\title{p0p3 polynomial term} +\usage{ +p0p3(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +p0(x)*x^(3) +} +\description{ +p0p3 polynomial term +} diff --git a/man/p0pm05.Rd b/man/p0pm05.Rd new file mode 100644 index 0000000..1b7d587 --- /dev/null +++ b/man/p0pm05.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{p0pm05} +\alias{p0pm05} +\title{p0pm05 polynomial term} +\usage{ +p0pm05(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +p0(x)*sign(x)*(abs(x)+.Machine$double.eps)^(-0.5) +} +\description{ +p0pm05 polynomial term +} diff --git a/man/p0pm1.Rd b/man/p0pm1.Rd new file mode 100644 index 0000000..f27275e --- /dev/null +++ b/man/p0pm1.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{p0pm1} +\alias{p0pm1} +\title{p0pm1 polynomial terms} +\usage{ +p0pm1(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +p0(x)*(x+.Machine$double.eps)^(-1) +} +\description{ +p0pm1 polynomial terms +} diff --git a/man/p0pm2.Rd b/man/p0pm2.Rd new file mode 100644 index 0000000..74038dd --- /dev/null +++ b/man/p0pm2.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{p0pm2} +\alias{p0pm2} +\title{p0pm2 polynomial term} +\usage{ +p0pm2(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +p0(x)*sign(x)*(abs(x)+.Machine$double.eps)^(-2) +} +\description{ +p0pm2 polynomial term +} diff --git a/man/p2.Rd b/man/p2.Rd new file mode 100644 index 0000000..b25d140 --- /dev/null +++ b/man/p2.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{p2} +\alias{p2} +\title{p2 polynomial term} +\usage{ +p2(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +x^(2) +} +\description{ +p2 polynomial term +} diff --git a/man/p3.Rd b/man/p3.Rd new file mode 100644 index 0000000..e5e0a78 --- /dev/null +++ b/man/p3.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{p3} +\alias{p3} +\title{p3 polynomial term} +\usage{ +p3(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +x^(3) +} +\description{ +p3 polynomial term +} diff --git a/man/pm05.Rd b/man/pm05.Rd new file mode 100644 index 0000000..5fa201f --- /dev/null +++ b/man/pm05.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{pm05} +\alias{pm05} +\title{pm05 polynomial term} +\usage{ +pm05(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +(abs(x)+.Machine$double.eps)^(-0.5) +} +\description{ +pm05 polynomial term +} diff --git a/man/pm1.Rd b/man/pm1.Rd new file mode 100644 index 0000000..d7b4aba --- /dev/null +++ b/man/pm1.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{pm1} +\alias{pm1} +\title{pm1 polynomial term} +\usage{ +pm1(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +sign(x)*(abs(x)+.Machine$double.eps)^(-1) +} +\description{ +pm1 polynomial term +} diff --git a/man/pm2.Rd b/man/pm2.Rd new file mode 100644 index 0000000..d269314 --- /dev/null +++ b/man/pm2.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{pm2} +\alias{pm2} +\title{pm2 polynomial term} +\usage{ +pm2(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +sign(x)*(abs(x)+.Machine$double.eps)^(-2) +} +\description{ +pm2 polynomial term +} diff --git a/man/relu.Rd b/man/relu.Rd new file mode 100644 index 0000000..8229a82 --- /dev/null +++ b/man/relu.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{relu} +\alias{relu} +\title{ReLu function} +\usage{ +relu(x) +} +\arguments{ +\item{x}{The vector of values} +} +\value{ +max(x,0) +} +\description{ +ReLu function +} diff --git a/man/sin.rad.Rd b/man/sin.rad.Rd new file mode 100644 index 0000000..930b9d0 --- /dev/null +++ b/man/sin.rad.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_functions.R +\name{sin.rad} +\alias{sin.rad} +\title{Sine function for degrees} +\usage{ +\method{sin}{rad}(x) +} +\arguments{ +\item{x}{The vector of values in degrees} +} +\value{ +The sine of x +} +\description{ +Sine function for degrees +} -- GitLab From 46e872cf3a0bda199f2fb4bdfd53862a6c0b60a4 Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Fri, 30 Jun 2023 20:17:09 +0200 Subject: [PATCH 24/24] Code style. --- R/gmjmcmc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index 20e903e..dabc3c3 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -72,7 +72,7 @@ gmjmcmc <- function (data, loglik.pi = gaussian.loglik, loglik.alpha = gaussian. if (p != P) N <- N.init else N <- N.final # Precalculate covariates and put them in data.t - if (length(params$feat$prel.filter)>0 | p!=1) data.t <- precalc.features(data, S[[p]]) + if (length(params$feat$prel.filter) > 0 | p != 1) data.t <- precalc.features(data, S[[p]]) else data.t <- data # Initialize first model of population -- GitLab