diff --git a/NAMESPACE b/NAMESPACE index 5f0e6f7466716a4739e179a2c021250df8e00dbf..3571fc9cb29e27a2ffa98f42faf7b88856545552 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,11 +22,15 @@ export(erf) export(exp_dbl) export(fbms) export(fbms.mlik.master) +export(fbms.mlik.master_old) export(gauss) export(gaussian.loglik) export(gaussian.loglik.alpha) export(gaussian.loglik.g) +export(gaussian.loglik2.alpha) +export(gaussian.loglik2.g) export(gaussian_tcch_log_likelihood) +export(gaussian_tcch_log_likelihood2) export(gelu) export(gen.params.gmjmcmc) export(gen.params.mjmcmc) @@ -34,6 +38,7 @@ export(gen.probs.gmjmcmc) export(gen.probs.mjmcmc) export(get.best.model) export(get.mpm.model) +export(glm.loglik) export(glm.logpost.bas) export(gmjmcmc) export(gmjmcmc.parallel) @@ -43,6 +48,9 @@ export(log_prior) export(logistic.loglik) export(logistic.loglik.ala) export(logistic.loglik.alpha) +export(logistic.loglik2) +export(logistic.loglik2.ala) +export(logistic.loglik2.alpha) export(marginal.probs) export(merge_results) export(mjmcmc) @@ -92,8 +100,8 @@ importFrom(BAS,hypergeometric1F1) importFrom(BAS,hypergeometric2F1) importFrom(BAS,intrinsic) importFrom(BAS,phi1) +importFrom(BAS,robust) importFrom(BAS,tCCH) -importFrom(BAS,testBF.prior) importFrom(BAS,uniform) importFrom(GenSA,GenSA) importFrom(Rcpp,sourceCpp) @@ -131,6 +139,7 @@ importFrom(stats,rbinom) importFrom(stats,rnorm) importFrom(stats,runif) importFrom(stats,sd) +importFrom(stats,terms) importFrom(stats,var) importFrom(tolerance,F1) importFrom(utils,sessionInfo) diff --git a/R/alpha_generation.R b/R/alpha_generation.R index c3511ddd2afae309c84e3cc0abac1a0acc425768..24587a117bdbdfbdd59a8ddc8ea8b305840bab29 100644 --- a/R/alpha_generation.R +++ b/R/alpha_generation.R @@ -5,7 +5,7 @@ gen.alphas <- function (strategy, feature, data, loglik, verbose) { if (strategy == "deep") feature <- alpha_3(feature, data, loglik, verbose) - else if (strategy == "random") feature <- alpha_4(feature) else stop("Not implemented.") + else if (strategy == "random") feature <- alpha_4(feature, data) else stop("Not implemented.") return(feature) } @@ -39,7 +39,7 @@ alpha_2 <- function (feature) { #' alpha_3 <- function (feature, data, loglik, verbose) { # Create the string representation of the feature with variable alphas - featfun <- print.feature(feature, dataset = TRUE, alphas = TRUE) + featfun <- print.feature(feature, dataset = TRUE, fixed = data$fixed, alphas = TRUE) featfun <- set_alphas(featfun) # Return if there are no alphas to set if (featfun$count == 0) return(feature) @@ -71,9 +71,9 @@ alpha_3 <- function (feature, data, loglik, verbose) { #' #' @noRd #' -alpha_4 <- function (feature) { +alpha_4 <- function (feature, data) { # Create the string representation of the feature with variable alphas - featfun <- print.feature(feature, dataset = TRUE, alphas = TRUE) + featfun <- print.feature(feature, dataset = TRUE, fixed = data$fixed, alphas = TRUE) featfun <- set_alphas(featfun) feature <- update.alphas(feature, rnorm(featfun$count,0,1)) diff --git a/R/arguments.R b/R/arguments.R index 854b4f2b719913fac2ede507fec57518f9f309e9..82a685d56d25031d7898912e0d7d9d6cad55aa75 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -121,7 +121,7 @@ gen.probs.gmjmcmc <- function (transforms) { #' Generate a parameter list for MJMCMC (Mode Jumping MCMC) #' -#' @param data The dataset that will be used in the algorithm +#' @param ncov The number of covariates in the dataset that will be used in the algorithm #' #' @return A list of parameters to use when running the mjmcmc function. #' @@ -187,13 +187,9 @@ gen.probs.gmjmcmc <- function (transforms) { #' #' #' @export gen.params.mjmcmc -gen.params.mjmcmc <- function (data) { +gen.params.mjmcmc <- function (ncov) { ### Create a list of parameters for the algorithm - ## Get the dimensions of the data to set parameters based on it - data.dim <- data.dims(data) - ncov <- data.dim[2] - 2 - ## Local optimization parameters sa_kern <- list(probs=c(0.1, 0.05, 0.2, 0.3, 0.2, 0.15), neigh.size=1, neigh.min=1, neigh.max=2) # Simulated annealing proposal kernel parameters @@ -215,7 +211,7 @@ gen.params.mjmcmc <- function (data) { mh_params <- list(neigh.size = 1, neigh.min = 1, neigh.max = 2) # Regular MH parameters ## Compile the list and return params <- list(burn_in=burn_in, mh=mh_params, large=large_params, random=random_params, - sa=sa_params, greedy=greedy_params, loglik=list()) + sa=sa_params, greedy=greedy_params, mlpost=list()) return(params) } @@ -224,7 +220,7 @@ gen.params.mjmcmc <- function (data) { #' #' This function generates the full list of parameters required for the Generalized Mode Jumping Markov Chain Monte Carlo (GMJMCMC) algorithm, building upon the parameters from \code{gen.params.mjmcmc}. The generated parameter list includes feature generation settings, population control parameters, and optimization controls for the search process. #' -#' @param data A data frame containing the dataset with covariates and response variable. +#' @param ncov The number of covariates in the dataset that will be used in the algorithm #' @return A list of parameters for controlling GMJMCMC behavior: #' #' @section Feature Generation Parameters (\code{feat}): @@ -307,23 +303,21 @@ gen.params.mjmcmc <- function (data) { #' #' @examples #' data <- data.frame(y = rnorm(100), x1 = rnorm(100), x2 = rnorm(100)) -#' params <- gen.params.gmjmcmc(data) +#' params <- gen.params.gmjmcmc(ncol(data) - 1) #' str(params) #' #' @seealso \code{\link{gen.params.mjmcmc}}, \code{\link{gmjmcmc}} #' #' @export gen.params.gmjmcmc -gen.params.gmjmcmc <- function (data) { +gen.params.gmjmcmc <- function (ncov) { # Get mjmcmc params - params <- gen.params.mjmcmc(data) - - ncov <- ncol(data) - 2 + params <- gen.params.mjmcmc(ncov) feat_params <- list(D = 5, L = 15, # Hard limits on feature complexity alpha = "unit", # alpha strategy ("unit" = None, "deep" strategy 3 from Hubin et al., "random" fully Bayesian strategy) - pop.max = min(100,as.integer(ncov * 1.5)), # Max features population size + pop.max = min(100, as.integer(ncov * 1.5)), # Max features population size keep.org = FALSE, # Always keep original covariates in every population - prel.filter = 0, # Filtration threshold for first population (i.e. filter covariates even if keep.org=TRUE) + prel.filter = 0, # Filtration threshold for first population (i.e. filter covariates even if keep.org=TRUE) keep.min = 0.8, # Minimum proportion of features to always keep [0,1] eps = 0.05, # Inclusion probability limit for feature generation check.col = TRUE, # Whether the colinearity should be checked @@ -333,9 +327,9 @@ gen.params.gmjmcmc <- function (data) { # Large jump parameters large_params <- list( - neigh.size = min(as.integer(params$feat$pop.max * 0.35),as.integer(ncov * 0.35),35), - neigh.min = min(as.integer(params$feat$pop.max * 0.35),as.integer(ncov * 0.25),25), - neigh.max = min(as.integer(params$feat$pop.max * 0.35),as.integer(ncov * 0.45),45) + neigh.size = min(as.integer(params$feat$pop.max * 0.35), as.integer(ncov * 0.35), 35), + neigh.min = min(as.integer(params$feat$pop.max * 0.35), as.integer(ncov * 0.25), 25), + neigh.max = min(as.integer(params$feat$pop.max * 0.35), as.integer(ncov * 0.45), 45) ) params$large <- large_params diff --git a/R/diagnostics.R b/R/diagnostics.R index f27deb26fbe94c0ec7f2132d6d70427f01cc0ec4..f34629bc2533207fb445c7f4468fb04911c370c1 100644 --- a/R/diagnostics.R +++ b/R/diagnostics.R @@ -17,7 +17,10 @@ #' @return A list of summary statistics for checking convergence with given confidence intervals #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc( y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' P = 2, +#' transforms = c("p0", "exp_dbl")) #' diagnstats <- diagn_plot(result) #' #' @export diff --git a/R/fbms.R b/R/fbms.R index c4e4e60a9074fd60843767813dee6268d5bef1f3..ced37f173b82357e5a7d769b4fd5a8e1301479b3 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -5,7 +5,37 @@ #' family, data, transforms, and other parameters to customize the model. #' #' @param formula A formula object specifying the model structure. Default is NULL. -#' @param family The distribution family of the response variable. Currently supports "gaussian", "binomial" and "custom". Default is "gaussian". +#' @param family The distribution family of the response variable. Currently supports "gaussian", "binomial", "poisson", "gamma", and "custom". Default is "gaussian". +#' @param beta_prior Type of prior as a string (default: "g-prior" with a = max(n, p^2)). Possible values include: +#' - "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +#' - "CH": Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, optionally `s`) +#' - "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires `a` for Gaussian) +#' - "EB-global": Empirical Bayes local prior (Gaussian, requires `a` for Gaussian) +#' - "g-prior": Zellner's g-prior (GLM/Gaussian, requires `g`) +#' - "hyper-g": Hyper-g prior (GLM/Gaussian, requires `a`) +#' - "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires `a`) +#' - "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, `s`, `rho`, `v`, `k`) +#' - "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +#' - "TG": Truncated Gamma prior (GLM/Gamma, requires `a`, `s`) +#' - "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) +#' - "uniform": Uniform prior (GLM/Gaussian, no additional args) +#' - "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +#' - "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +#' - "robust": Robust prior (Gaussian/GLM, no additional args) +#' - "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +#' - "ZS-null": Zellner-Siow null prior (Gaussian, requires `a`) +#' - "ZS-full": Zellner-Siow full prior (Gaussian, requires `a`) +#' - "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires `a`) +#' - "AIC": AIC prior from BAS (Gaussian, requires penalty `a`) +#' - "BIC": BIC prior from BAS (Gaussian/GLM) +#' - "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires `a`) +#' - r: Model complexity penalty (default: 1/n) +#' - g: Tuning parameter for g-prior (default: max(n, p^2)) +#' - a, b, s, v, rho, k: Hyperparameters for various priors +#' - n: Sample size for some priors (default: length(y)) +#' - var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") +#' - laplace: Logical for Laplace approximation in GLM only (default: FALSE) +#' @param model_prior a list with parameters of model priors, by default r should be provided #' @param loglik.pi Custom function to compute the logarithm of the posterior mode based on logarithm of marginal likelihood and logarithm of prior functions (needs specification only used if family = "custom") #' @param data A data frame containing the variables in the model. If NULL, the variables are taken from the environment of the formula. Default is NULL. #' @param method Which fitting algorithm should be used, currently implemented options include "gmjmcmc", "gmjmcmc.parallel", "mjmcmc" and "mjmcmc.parallel" with "mjmcmc" being the default and 'mjmcmc' means that only linear models will be estimated @@ -28,30 +58,59 @@ #' cores = 1 #' ) #' summary(fbms_result) -#' plot(fbms_result) #' #' #' @seealso \code{\link{mjmcmc}}, \code{\link{gmjmcmc}}, \code{\link{gmjmcmc.parallel}} #' @export -fbms <- function(formula = NULL, family = "gaussian", data = NULL, impute = FALSE, - loglik.pi = gaussian.loglik, - method = "mjmcmc", verbose = TRUE, ...) { - if (family == "gaussian") - loglik.pi <- gaussian.loglik - else if (family == "binomial") - loglik.pi <- logistic.loglik - else if (family == "custom") - loglik.pi <- loglik.pi +#' @importFrom stats terms +fbms <- function ( + formula = NULL, + family = "gaussian", + beta_prior = list(type = "g-prior"), + model_prior = NULL, + data = NULL, + impute = FALSE, + loglik.pi = NULL, + method = "mjmcmc", + verbose = TRUE, + ... +) { + + if(length(data) == 0) + stop("Training data must be provided!") + + if(length(model_prior) == 0) + model_prior = list(r = 1/dim(data)[1]) + if (family != "custom") { + mlpost_params <- model_prior + loglik.pi <- select.mlpost.fun(beta_prior$type, family) + if(family == "gaussian") + mlpost_params$beta_prior <- gen.mlpost.params.lm(beta_prior$type, beta_prior, ncol(data) - 1, nrow(data)) + else + { + mlpost_params$beta_prior <- gen.mlpost.params.glm(beta_prior$type, beta_prior, ncol(data) - 1, nrow(data)) + mlpost_params$beta_prior$type <- beta_prior$type + } + } else { + if (family == "gaussian") + loglik.pi <- gaussian.loglik + else if (family == "binomial") + loglik.pi <- logistic.loglik + else if (family == "custom") + loglik.pi <- loglik.pi + } + + if (!is.null(formula)) { if (missing(data)) { data <- environment(formula) } na.opt <- getOption("na.action") - if(impute) - options(na.action='na.pass') + if (impute) + options(na.action = 'na.pass') else - options(na.action='na.omit') + options(na.action = 'na.omit') mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data"), names(mf), 0L) mf <- mf[c(1L, m)] @@ -59,65 +118,272 @@ fbms <- function(formula = NULL, family = "gaussian", data = NULL, impute = FALS mf[[1L]] <- quote(stats::model.frame) mf <- eval(mf, parent.frame()) - Y <- model.response(mf, "any") - X <- model.matrix(formula, data = data)[, -1] + X <- model.matrix(formula, data = data) + intercept <- attr(terms(formula, data = data), "intercept") == 1 + if (intercept) X <- X[, -1, drop = FALSE] mis.Y <- which(is.na(Y)) - if(length(mis.Y)>0) - { + if (length(mis.Y) > 0) { warning("Missing values in the response. Dropped.") - df <- data.frame(Y[-c(mis.Y)], X[-c(mis.Y),]) - } else df <- data.frame(Y, X) + Y <- Y[-c(mis.Y)] + X <- X[-c(mis.Y), ] + } - mis.All <- sum(is.na(df)) + mis.X <- sum(is.na(X)) imputed <- NULL - if(impute & mis.All>0) - { + if (impute && mis.X > 0) { print("Imputing missing values!") - na.matr <- data.frame(1*(is.na(df))) - names(na.matr) <- paste0("mis_",names(na.matr)) + na.matr <- data.frame(1 * (is.na(X))) + names(na.matr) <- paste0("mis_", names(na.matr)) cm <- colMeans(na.matr) - na.matr <- na.matr[,cm!=0] - for (i in seq_along(df)){ - df[[i]][is.na(df[[i]])] <- median(df[[i]], na.rm = TRUE) + na.matr <- na.matr[, cm != 0] + for (i in seq_along(X)) { + X[[i]][is.na(X[[i]])] <- median(X[[i]], na.rm = TRUE) } - imputed <- names(df)[cm!=0] - df <- data.frame(df,na.matr) + imputed <- names(X)[cm != 0] + X <- data.frame(X, na.matr) rm(na.matr) rm(cm) print("Continue to sampling!") - } else if(mis.All>0){ + } else if (mis.X > 0) { print("Dropping missing values!") } } else { - df <- data + Y <- data[, 1] + X <- data[, -1, drop = FALSE] + intercept <- TRUE imputed <- NULL na.opt <- getOption("na.action") - if(impute) - { - options(na.action='na.pass') + if (impute) { + options(na.action = 'na.pass') stop("Imputation is only implemented when formula is provided.\n Please specify formula and rerun!") + } else { + options(na.action = 'na.omit') } - else - options(na.action='na.omit') } if (method == "mjmcmc.parallel") - res <- mjmcmc.parallel(df, loglik.pi, verbose = verbose, ...) + res <- mjmcmc.parallel(x = X, y = Y, loglik.pi = loglik.pi, mlpost_params = mlpost_params, intercept = intercept, verbose = verbose, ...) else if (method == "mjmcmc") - res <- mjmcmc(df, loglik.pi, verbose = verbose, ...) - else if (method == "gmjmcmc.parallel") { - res <- gmjmcmc.parallel(data = df, loglik.pi = loglik.pi, verbose = verbose,...) - } - + res <- mjmcmc(x = X, y = Y, loglik.pi = loglik.pi, mlpost_params = mlpost_params, intercept = intercept, verbose = verbose, ...) + else if (method == "gmjmcmc.parallel") + res <- gmjmcmc.parallel(x = X, y = Y, loglik.pi = loglik.pi, mlpost_params = mlpost_params, intercept = intercept, verbose = verbose,...) else if (method == "gmjmcmc") - res <- gmjmcmc(df, loglik.pi, verbose = verbose, ...) + res <- gmjmcmc(x = X, y = Y, loglik.pi = loglik.pi, mlpost_params = mlpost_params, intercept = intercept, verbose = verbose, ...) else - stop("Error: Method must be one of gmjmcmc, gmjmcmc.parallel,mjmcmc or mjmcmc.parallel!") + stop("Error: Method must be one of gmjmcmc, gmjmcmc.parallel, mjmcmc or mjmcmc.parallel!") attr(res, "imputed") <- imputed - attr(res, "all_names") <- names(df)[1:(dim(df)[2]-1)] - options(na.action=na.opt) + attr(res, "all_names") <- names(X)[1:(dim(X)[2] - 1)] + options(na.action = na.opt) return(res) -} \ No newline at end of file +} + +gen.mlpost.params.glm <- function (beta_prior, user_params, p, n) { + + if(beta_prior == "Jeffreys-BIC") + { + return(NULL) + } else if(beta_prior == "beta.prime") { + return(BAS::beta.prime(n = n)) + } else if (beta_prior == "CH") { + check_required_params(c("a", "b", "s"), user_params, beta_prior) + return(BAS::CCH(alpha = user_params$a, beta = user_params$b, s = user_params$s)) + } else if (beta_prior == "EB-local") { + return(BAS::EB.local()) + } else if (beta_prior == "g-prior") { + if (is.null(user_params$g)) { + user_params$g <- max(p^2, n) + } + return(BAS::g.prior(user_params$g)) + } else if (beta_prior == "hyper-g") { + check_required_params("a", user_params, beta_prior) + params <- BAS::hyper.g(alpha = user_params$a) + params$method <- 1 + return(params) + } else if (beta_prior == "tCCH") { + check_required_params(c("a", "b", "s", "rho", "v", "k"), user_params, beta_prior) + return(BAS::tCCH( + alpha = user_params$a, + beta = user_params$b, + s = user_params$s, + r = user_params$rho, + v = user_params$v, + theta = user_params$k + )) + } else if (beta_prior == "intrinsic") { + return(BAS::intrinsic(n = n)) + } else if (beta_prior == "TG") { + check_required_params("a", user_params, beta_prior) + return(BAS::TG(alpha = user_params$a)) + } else if (beta_prior == "Jeffreys") { + return(BAS::Jeffreys()) + } else if (beta_prior == "uniform") { + return(BAS::tCCH(alpha = 2, beta = 2, s = 0, r = 0, v = 1, theta = 1)) + } else if (beta_prior == "benchmark") { + return(BAS::tCCH(alpha = 0.02, beta = 0.02 * max(n, p^2), s = 0, r = 0, v = 1, theta = 1)) + } else if (beta_prior == "ZS-adapted") { + return(BAS::tCCH(alpha = 1, beta = 2, s = n + 3, r = 0, v = 1, theta = 1)) + } else if (beta_prior == "robust") { + return(BAS::robust(n = as.numeric(n)))# important to cast to numeric for BAS, do not change. + } else if (beta_prior == "hyper-g-n") { + if (is.null(user_params$a)) user_params$a <- 3 + return(BAS::hyper.g.n(alpha = user_params$a, n = n)) + } else if (beta_prior == "BIC") { + return(BAS::bic.prior(n = n)) + } + stop("Unknown prior, please verify your inputs.") +} + + +gen.mlpost.params.lm <- function (beta_prior, user_params, p, n) { + + if (beta_prior == "Jeffreys-BIC") { + if(length(user_params$var)==0) + { + user_params$var <- "unknown" + } + return(list(var = user_params$var)) + }else if (beta_prior == "beta.prime") { + return(list(type = "beta.prime")) + } else if (beta_prior == "CH") { + check_required_params(c("a", "b", "s"), user_params, beta_prior) + user_params <- list(type = + "CH", + a = user_params$a, + b = user_params$b, + s = user_params$s + ) + return(user_params) + } else if (beta_prior == "tCCH") { + check_required_params(c("a", "b", "s", "rho", "v", "k"), user_params, beta_prior) + user_params <- list( + type = "tCCH", + a = user_params$a, + b = user_params$b, + s = user_params$s, + rho = user_params$rho, + v = user_params$v, + k = user_params$k + ) + return(user_params) + } else if (beta_prior == "intrinsic") { + return(list(type = "intrinsic")) + } else if (beta_prior == "TG") { + check_required_params(c("a", "s"), user_params, beta_prior) + user_params <- list( + type = "TG", + a = user_params$a, + s = user_params$s + ) + return(user_params) + } else if (beta_prior == "Jeffreys") { + return(list(type = "Jeffreys")) + } else if (beta_prior == "ZS-adapted") { + return(list(type = "ZS-adapted")) + } else if (beta_prior == "benchmark") { + return(list(type = "benchmark")) + } else if (beta_prior == "robust") { + return(list(type = "robust")) + } else if (beta_prior == "uniform") { + return(list(type = "uniform")) + } else{ + if (!is.null(user_params$a)) { + alpha <- user_params$a + } else { + if (beta_prior == "g-prior") { + alpha <- min(p^2, n) + } else { + alpha <- -1 #check how BAS uses the default + } + } + if (beta_prior == "g-prior") { + return(list(method = 0, alpha = alpha)) + } else if (beta_prior == "hyper-g") { + return(list(method = 1, alpha = alpha)) + } else if (beta_prior == "EB-local") { + return(list(method = 2, alpha = alpha)) + } else if (beta_prior == "BIC") { + return(list(method = 3, alpha = alpha)) + } else if (beta_prior == "ZS-null") { + return(list(method = 4, alpha = alpha)) + } else if (beta_prior == "ZS-full") { + return(list(method = 5, alpha = alpha)) + } else if (beta_prior == "hyper-g-laplace") { + return(list(method = 6, alpha = alpha)) + } else if (beta_prior == "AIC") { + return(list(method = 7, alpha = alpha)) + } else if (beta_prior == "EB-global") { + return(list(method = 2, alpha = alpha)) + } else if (beta_prior == "JZS") { + return(list(method = 9, alpha = alpha)) + } else if (beta_prior == "hyper-g-n") { + return(list(method = 8, alpha = alpha)) + } else { + stop("Unrecognized prior_beta for Gaussian GLM: ", beta_prior) + } + } +} + +check_required_params <- function (required, user_params, beta_prior) { + for (req in required) { + if (is.null(user_params[[req]]) || !is.numeric(user_params[[req]])) { + par_names <- paste0(required, collapse = ", ") + stop(paste0("The parameters: ", par_names, " must be provided for the ", beta_prior, " prior.")) + return(FALSE) + } + } + return(TRUE) +} + +select.mlpost.fun <- function (beta_prior, family) { + if (!(family %in% c("binomial", "poisson", "gamma", "gaussian"))) { + stop(paste0( + "Unsupported family: ", family, ". Supported families are 'binomial', 'poisson', 'gamma', or 'gaussian'." + )) + } + + gaussian_only_priors <- c("ZS-null", "ZS-full", "hyper-g-laplace","BIC", "AIC", "JZS","EB-global") + gaussian_not_robust <- c("CH", "tCCH", "ZS-adapted", "TG", "beta.prime", "benchmark", "Jeffreys") + gaussian_robust <- c("g-prior", "hyper-g", "EB-local","BIC", "Jeffreys-BIC", "ZS-null", "ZS-full", "hyper-g-laplace", + "AIC", "hyper-g-n", "JZS") + gaussian_tcch <- c("CH", "tCCH", "TG","beta.prime", "intrinsic", "ZS-adapted", "uniform","Jeffreys", "benchmark", "robust") + gaussian_bas <- c("g-prior", "hyper-g", "EB-local","ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS") + glm_priors <- c("CH", "tCCH", "TG","beta.prime", "EB-local", "g-prior", "hyper-g", "hyper-g-n", + "intrinsic", "ZS-adapted", "Jeffreys", "uniform", "benchmark", "robust", "Jeffreys-BIC") + + if (family %in% c("binomial", "poisson", "gamma")) { + if (beta_prior %in% gaussian_only_priors) { + stop(paste0( + "Prior ", beta_prior, " is not supported for the GLM family", family, + ". Supported priors are: ", paste(glm_priors, collapse = ", ") + )) + } + if (beta_prior == "Jeffreys-BIC") { + if (family == "binomial") { + return(logistic.loglik) + } else { + return(glm.loglik) + } + } else { + return(glm.logpost.bas) + } + } else if (family == "gaussian") { + if (beta_prior %in% gaussian_not_robust) { + warning(paste0( + "Prior ", beta_prior, " is not recommended for Gaussian family models as it may be unstable for strong signals (R^2 > 0.9).", + "Recommended priors under the Gaussian family are: ", paste(gaussian_robust, collapse = ", ") + )) + } + if (beta_prior %in% gaussian_tcch) { + return(gaussian_tcch_log_likelihood) + } else if (beta_prior == "Jeffreys-BIC") { + return(gaussian.loglik) + } else if (beta_prior %in% gaussian_bas) { + return(lm.logpost.bas) + } + } + stop("Unknown prior, please verify your inputs.") +} + diff --git a/R/feature.R b/R/feature.R index 08071753ae6c6baa93311db8fac502096d4bcdc2..05c350737c11bda66de7ed43e660541e580ca30d 100644 --- a/R/feature.R +++ b/R/feature.R @@ -97,6 +97,7 @@ update.alphas <- function (feature, alphas, recurse=FALSE) { #' #' @param x An object of class "feature" #' @param dataset Set the regular covariates as columns in a dataset +#' @param fixed How many of the first columns in dataset are fixed and do not contribute to variable selection #' @param alphas Print a "?" instead of actual alphas to prepare the output for alpha estimation #' @param labels Should the covariates be named, or just referred to as their place in the data.frame. #' @param round Should numbers be rounded when printing? Default is FALSE, otherwise it can be set to the number of decimal places. @@ -105,11 +106,14 @@ update.alphas <- function (feature, alphas, recurse=FALSE) { #' @return String representation of a feature #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(x = matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100), +#' P = 2, +#' transforms = c("p0", "exp_dbl")) #' print(result$populations[[1]][1]) #' #' @export -print.feature <- function (x, dataset = FALSE, alphas = FALSE, labels = FALSE, round = FALSE, ...) { +print.feature <- function (x, dataset = FALSE, fixed = 0, alphas = FALSE, labels = FALSE, round = FALSE, ...) { fString <- "" feat <- x[[length(x)]] # This is a more complex feature @@ -145,15 +149,15 @@ print.feature <- function (x, dataset = FALSE, alphas = FALSE, labels = FALSE, r if (alphas) fString <- paste0(fString, "?*") else fString <- paste0(fString, feat[j,3], "*") } - fString <- paste0(fString, print.feature(x[[feat[j, 2]]], dataset, alphas, labels, round), op) + fString <- paste0(fString, print.feature(x[[feat[j, 2]]], dataset, fixed, alphas, labels, round), op) } } fString <- paste0(fString, ")") } # This is a plain covariate else if (is.numeric(feat)) { - if (dataset) fString <- paste0("data[,", feat + 2, "]") - else if (labels[1] != F) fString <- labels[feat] + if (dataset) fString <- paste0("data$x[,", fixed + feat, "]") + else if (labels[1] != FALSE) fString <- labels[feat] else fString <- paste0("x", feat) } else stop("Invalid feature structure") return(fString) diff --git a/R/feature_generation.R b/R/feature_generation.R index 999384cba88c5e07931189098ce1500c5c9e3e7a..2911f9fa2abd4a1d9bfce957cd414365cf871cf4 100644 --- a/R/feature_generation.R +++ b/R/feature_generation.R @@ -75,26 +75,30 @@ check.collinearity <- function (proposal, features, F.0.size, data, mock) { # Add the proposal to the feature list for evaluation features[[length(features) + 1]] <- proposal # Generate mock data to test with (avoiding too costly computations) - if (mock) - mock.data <- matrix(c(runif((F.0.size * 2), -100, 100), rep(1, F.0.size * 2), - runif((F.0.size * 2) * (F.0.size), -100, 100)), F.0.size * 2, F.0.size + 2) - else - mock.data <- check.data(data[seq_len(min(F.0.size * 2, dim(data)[1])), ], FALSE) + n <- F.0.size * 2 + if (mock) { + mock.data <- list(x = matrix(runif(n * (F.0.size), -100, 100), n, F.0.size), + y = matrix(runif(n * (ncol(data$y)), -100, 100), n, ncol(data$y)), + fixed = data$fixed) + } else { + obs_idx <- seq_len(min(n, nrow(data$x))) + mock.data <- check.data(data$x[obs_idx, ], data$y[obs_idx, ], data$fixed, FALSE) + } # Use the mock data to precalc the features mock.data.precalc <- precalc.features(mock.data, features) # Fit a linear model with the mock data precalculated features - linearmod <- lm(as.data.frame(mock.data.precalc[, -2])) + linearmod <- lm.fit(mock.data.precalc$x, mock.data.precalc$y) # Check if all coefficients were possible to calculate if (sum(is.na(linearmod$coefficients)) == 0) return(FALSE) else return(TRUE) } # Generate features to represent the covariates, just takes the count needed -gen.covariates <- function (count) { +gen.covariates <- function (data) { features <- list() - for (i in 1:count) { + for (i in seq_len(ncol(data$x) - data$fixed)) { features <- c(features, i) - class(features[[i]]) <- "feature" + class(features[[length(features)]]) <- "feature" } return(features) } \ No newline at end of file diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index 5cd999d647ff46bcbddea509a2abc3932d31d013..d07b5931b21797d0a4b928da543d97b119c107eb 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -10,9 +10,11 @@ NULL #' Main algorithm for GMJMCMC (Genetically Modified MJMCMC) #' -#' @param data A matrix containing the data to use in the algorithm, -#' first column should be the dependent variable, -#' and the rest of the columns should be the independent variables. +#' @param x matrix containing the design matrix with data to use in the algorithm, +#' @param y response variable +#' @param mlpost_params parameters for the estimator function loglik.pi +#' @param intercept whether intercept should be added to the design matrix (no model selection for intercept) +#' @param fixed how many of the first columns of the design matrix will always be included in the models #' @param loglik.pi The (log) density to explore #' @param loglik.alpha The likelihood function to use for alpha calculation #' @param transforms A Character vector including the names of the non-linear functions to be used by the modification @@ -40,16 +42,23 @@ NULL #' \item{best}{Best marginal model probability throughout the run, represented as the maximum value in \code{unlist(best.margs)}.} #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' P = 2, +#' transform = c("p0", "exp_dbl")) #' summary(result) #' plot(result) #' #' @export gmjmcmc gmjmcmc <- function ( - data, - loglik.pi = gaussian.loglik, + x, + y, + loglik.pi = fbms.mlik.master, loglik.alpha = gaussian.loglik.alpha, + mlpost_params = list(family = "gaussian", beta_prior = list(type = "robust")), transforms, + intercept = TRUE, + fixed = 0, P = 10, N.init = 100, N.final = 100, @@ -59,12 +68,16 @@ gmjmcmc <- function ( verbose = TRUE ) { # Verify that the data is well-formed - labels <- names(data)[-1] - data <- check.data(data, verbose) + if (intercept) { + x <- cbind(1, x) + fixed <- fixed + 1 + } + data <- check.data(x, y, fixed, verbose) # Generate default probabilities and parameters if there are none supplied. if (is.null(probs)) probs <- gen.probs.gmjmcmc(transforms) - if (is.null(params)) params <- gen.params.gmjmcmc(data) + if (is.null(params)) params <- gen.params.gmjmcmc(ncol(data$x) - data$fixed) + if (!is.null(mlpost_params)) params$mlpost <- mlpost_params # Extract labels from column names in dataframe labels <- get.labels(data, verbose) @@ -88,7 +101,7 @@ gmjmcmc <- function ( best.margs <- vector("list", P) # Create first population - F.0 <- gen.covariates(ncol(data) - 2) + F.0 <- gen.covariates(data) if (is.null(params$prel.select)) S[[1]] <- F.0 else @@ -107,7 +120,7 @@ gmjmcmc <- function ( # Initialize first model of population model.cur <- as.logical(rbinom(n = length(S[[p]]), size = 1, prob = 0.5)) - model.cur.res <- loglik.pre(loglik.pi, model.cur, complex, data.t, params$loglik, NULL, FALSE) + model.cur.res <- loglik.pre(loglik.pi, model.cur, complex, data.t, params$mlpost, NULL, FALSE) model.cur <- list(prob = 0, model = model.cur, coefs = model.cur.res$coefs, crit = model.cur.res$crit, alpha = 0) best.crit <- model.cur$crit # Reset first best criteria value @@ -160,6 +173,8 @@ gmjmcmc <- function ( transforms = transforms # Transformations used by the model ) results$labels <- labels + results$fixed <- fixed + results$intercept <- intercept attr(results, "class") <- "gmjmcmc" return(results) } @@ -185,8 +200,7 @@ gmjmcmc <- function ( gmjmcmc.transition <- function (S.t, F.0, data, loglik.alpha, marg.probs.F.0, marg.probs, labels, probs, params, verbose = TRUE) { # Sample which features to keep based on marginal inclusion below probs$filter feats.keep <- as.logical(rbinom(n = length(marg.probs), size = 1, prob = pmin(marg.probs / probs$filter, 1))) - - + # Always keep original covariates if that setting is on if (params$keep.org) { if (params$prel.filter > 0) { @@ -196,24 +210,21 @@ gmjmcmc.transition <- function (S.t, F.0, data, loglik.alpha, marg.probs.F.0, ma else feats.keep[seq_along(F.0)] <- T } - # Avoid removing too many features if (length(feats.keep) > 0 && mean(feats.keep) < params$keep.min & sum(feats.keep) < params$pop.max/2) { feats.add.n <- round((params$keep.min - mean(feats.keep)) * length(feats.keep)) feats.add <- sample(which(!feats.keep), feats.add.n) - if((length(feats.add) + sum(feats.keep))>=params$pop.max) + if ((length(feats.add) + sum(feats.keep)) >= params$pop.max) feats.keep[feats.add] <- T } - if(sum(feats.keep)>params$pop.max) - { + if (sum(feats.keep)>params$pop.max) { warning("Number of features to keep greater than pop.max! Continue with first pop.max features to be kept! \n Ignore if the final set of features with high probabilities is smaller than the specified $feat$pop.max \n Otherwise check your tuning parameters and increase $feat$pop.max or probs$filter!") feats.keep[which(feats.keep==TRUE)[(params$pop.max+1):length(which(feats.keep==TRUE))]] <- FALSE } - # Create a list of which features to replace feats.replace <- which(!feats.keep) @@ -223,18 +234,15 @@ gmjmcmc.transition <- function (S.t, F.0, data, loglik.alpha, marg.probs.F.0, ma marg.probs.use <- c(rep(params$eps, length(F.0)), pmin(pmax(marg.probs, params$eps), (1-params$eps))) # Perform the replacements - if(length(S.t)>params$pop.max) + if (length(S.t) > params$pop.max) feats.replace <- sort(feats.replace,decreasing = T) for (i in feats.replace) { prev.size <- length(S.t) - prev.feat.string <- print.feature(S.t[[i]], labels=labels, round = 2) - if(prev.size>params$pop.max) - { + prev.feat.string <- print.feature(S.t[[i]], labels = labels, round = 2) + if (prev.size > params$pop.max) { cat("Removed feature", prev.feat.string, "\n") S.t[[i]] <- NULL - } - else - { + } else { S.t[[i]] <- gen.feature(c(F.0, S.t), marg.probs.use, data, loglik.alpha, probs, length(F.0), params, verbose) if (prev.size > length(S.t)) { if (verbose) { diff --git a/R/gmjmcmc_support.R b/R/gmjmcmc_support.R index db52330fdf26b61e2b24d3b397d58dd09db08c8c..3d970b81635b0b95aba6935b767db0ae3b372f3b 100644 --- a/R/gmjmcmc_support.R +++ b/R/gmjmcmc_support.R @@ -44,7 +44,10 @@ verify.inputs <- function (data, loglik.pi, transforms, T, N, N.final, probs, pa #' @return A numeric vector of marginal model probabilities based on relative frequencies of model visits in MCMC. #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(x = matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100), +#' P = 2, +#' transforms = c("p0", "exp_dbl")) #' marginal.probs(result$models[[1]]) #' #' @export @@ -96,20 +99,20 @@ marginal.probs.renorm <- function (models, type = "features") { # Function for precalculating features for a new feature population precalc.features <- function (data, features) { - precalc <- matrix(NA, nrow(data), length(features) + 2) - precalc[, 1:2] <- data[, 1:2] + precalc <- matrix(NA, nrow(data$x), length(features)) for (f in seq_along(features)) { - feature_string <- print.feature(features[[f]], dataset = TRUE) - precalc[, (f + 2)] <- eval(parse(text = feature_string)) + feature_string <- print.feature(features[[f]], dataset = TRUE, fixed = data$fixed) + precalc[, f] <- eval(parse(text = feature_string)) } # Replace any -Inf and Inf values caused by under- or overflow precalc <- replace.infinite.data.frame(precalc) - return(precalc) + data$x <- cbind(data$x[, seq_len(data$fixed)], precalc) + return(data) } # TODO: Compare to previous mliks here instead, also add a flag to do that in full likelihood estimation scenarios. # Function to call the model function -loglik.pre <- function (loglik.pi, model, complex, data, params = NULL, visited.models = visited.models, sub = sub) { +loglik.pre <- function (loglik.pi, model, complex, data, params = NULL, visited.models, sub) { if (!is.null(visited.models) && has_key(visited.models, model)) { if (!sub) { return(visited.models[[model]]) @@ -121,7 +124,7 @@ loglik.pre <- function (loglik.pi, model, complex, data, params = NULL, visited. # Get the complexity measures for just this model complex <- list(width = complex$width[model], oc = complex$oc[model], depth = complex$depth[model]) # Call the model estimator with the data and the model, note that we add the intercept to every model - model.res <- loglik.pi(data[, 1], data[, -1], c(T, model), complex, params) + model.res <- loglik.pi(data$y, data$x, c(rep(TRUE, data$fixed), model), complex, params) # Check that the critical value is acceptable if (!is.numeric(model.res$crit) || is.nan(model.res$crit)) model.res$crit <- -.Machine$double.xmax # Alpha cannot be calculated if the current and proposed models have crit which are -Inf or Inf @@ -135,34 +138,29 @@ loglik.pre <- function (loglik.pi, model, complex, data, params = NULL, visited. # Function to check the data # Checks that there is an intercept in the data, adds it if missing # Coerces the data to be of type matrix -check.data <- function (data, verbose) { - if (!is.matrix(data)) { - data <- as.matrix(data) - if (verbose) cat("Data coerced to matrix type.\n") +check.data <- function (x, y, fixed, verbose) { + if (!is.matrix(x)) { + x <- as.matrix(x) + if (verbose) cat("Data (x) coerced to matrix type.\n") } - if (sum(data[, 2] == 1) != nrow(data)) { - data <- cbind(data[, 1], 1, data[, -1]) - if (verbose) cat("Intercept added to data.\n") + if (!is.matrix(y)) { + y <- as.matrix(y) + if (verbose) cat("Data (y) coerced to matrix type.\n") } - return(data) -} - -# Function to get the dimensions of a dataset, adding an intercept if necessary -data.dims <- function (data) { - dims <- dim(data) - if (sum(data[,2] == 1) != nrow(data)) { - dims[2] <- dims[2] + 1 + if (nrow(x) != nrow(y)) { + stop("x and y must have the same number of rows") } - return(dims) + return(list(x = x, y = y, fixed = fixed)) } # Function to extract column names if they are well formed get.labels <- function (data, verbose) { - labels <- colnames(data)[-(1:2)] - if (is.null(labels)) return(F) + labels <- colnames(data$x) + if (is.null(labels)) return(FALSE) if (sum(is.na(labels)) != 0) { if (verbose) cat("NA labels present, using x#\n") - return(F) + return(FALSE) } + if (data$fixed > 0) labels <- labels[-seq_len(data$fixed)] return(labels) } \ No newline at end of file diff --git a/R/likelihoods.R b/R/likelihoods.R index 2016b5f83f15e2f98fa4479ab44d8a50c6ac557c..7d2afcd19b912ea26419b6501022aecb9400726d 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -16,67 +16,55 @@ #' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). #' #' @examples -#' glm.logpost.bas(as.integer(rnorm(100) > 0),cbind(1,matrix(rnorm(100))),c(TRUE,TRUE),list(oc = 1)) +#' glm.logpost.bas(as.integer(rnorm(100) > 0), +#' cbind(1, matrix(rnorm(100))), +#' c(TRUE, TRUE), +#' list(oc = 1)) #' #' @importFrom BAS uniform Jeffreys g.prior #' @importFrom stats poisson Gamma glm.control #' @export glm.logpost.bas -glm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), family = "binomial", prior_beta = Jeffreys(), laplace = FALSE)) { +glm.logpost.bas <- function (y, x, model, complex, params = list(r = NULL, family = "binomial", beta_prior = Jeffreys(), laplace = FALSE)) { if (length(params) == 0) - params <- list(r = 1/dim(x)[1], family = "binomial", prior_beta = g.prior(max(dim(x)[1],sum(model)-1)), laplace = FALSE) + params <- list(r = 1 / dim(x)[1], family = "binomial", beta_prior = g.prior(max(dim(x)[1], sum(model) - 1)), laplace = FALSE) + else if(length(params$r) == 0) + params$r = 1 / dim(x)[1] + if(length(params$laplace) == 0) + params$laplace = FALSE p <- sum(model) - 1 - if(p==0) - { - probinit <- as.numeric(c(1,0.99)) + if (p == 0) { + probinit <- as.numeric(c(1, 0.99)) model[2] <- T - }else{ - probinit <- as.numeric(c(1,rep(0.99,p))) + } else { + probinit <- as.numeric(c(1, rep(0.99, p))) } - mod<-NULL - - tryCatch({ - if(params$family == "binomial") - suppressWarnings({ - mod <- .Call(BAS:::C_glm_deterministic, - y = as.numeric(y), X = as.matrix(x[,model]), - Roffset = as.numeric(rep(0, length(y))), - Rweights = as.numeric(rep(1, length(y))), - Rprobinit = probinit, - Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), - modelprior = uniform(), - betaprior = params$prior_beta, - family = binomial(), - Rcontrol = glm.control(), - Rlaplace = as.integer(params$laplace)) - }) - else if(params$family == "poisson") - suppressWarnings({ - mod <- .Call(BAS:::C_glm_deterministic, - y = as.numeric(y), X = as.matrix(x[,model]), - Roffset = as.numeric(rep(0, length(y))), - Rweights = as.numeric(rep(1, length(y))), - Rprobinit = probinit, - Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), - modelprior = uniform(), - betaprior = params$prior_beta, - family = poisson(), - Rcontrol = glm.control(), - Rlaplace = as.integer(params$laplace)) - }) + + mod <- NULL + + if (params$family == "binomial") + family_use <- binomial() + else if (params$family == "poisson") + family_use <- poisson() else - suppressWarnings({ - mod <- .Call(BAS:::C_glm_deterministic, - y = as.numeric(y), X = as.matrix(x[,model]), - Roffset = as.numeric(rep(0, length(y))), - Rweights = as.numeric(rep(1, length(y))), - Rprobinit = probinit, - Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), - modelprior = uniform(), - betaprior = params$prior_beta, - family = Gamma(), - Rcontrol = glm.control(), - Rlaplace = as.integer(params$laplace)) + family_use <- Gamma() + + + tryCatch({ suppressWarnings({ + mod <- .Call( + BAS:::C_glm_deterministic, + y = as.numeric(y), + X = as.matrix(x[, model]), + Roffset = as.numeric(rep(0, length(y))), + Rweights = as.numeric(rep(1, length(y))), + Rprobinit = probinit, + Rmodeldim = as.integer(rep(0, ifelse(p == 0,2,1))), + modelprior = uniform(), + betaprior = params$beta_prior, + family = family_use, + Rcontrol = glm.control(), + Rlaplace = as.integer(params$laplace) + ) }) }, error = function(e) { # Handle the error by setting result to NULL @@ -84,18 +72,17 @@ glm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), # You can also print a message or log the error if needed cat("An error occurred:", conditionMessage(e), "\n") }) - - if(length(mod)==0) { - return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)),coefs = rep(0,p+1))) + + if (length(mod) == 0 || is.nan(mod$logmarg[2])) { + return(list(crit = -.Machine$double.xmax + log_prior(params, complex), coefs = rep(0, p + 1))) } - - if(p == 0) - { - ret <- mod$logmarg[2] + log(params$r) * sum(complex$oc) - return(list(crit=ret, coefs=mod$mle[[2]])) + + if (p == 0) { + ret <- mod$logmarg[2] + log(params$r) * sum(complex$oc) + return(list(crit = ret, coefs = mod$mle[[2]])) } ret <- mod$logmarg + log(params$r) * sum(complex$oc) - return(list(crit=ret, coefs=mod$mle[[1]])) + return(list(crit = ret, coefs = mod$mle[[1]])) } @@ -107,55 +94,42 @@ glm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), #' @param x The matrix containing the precalculated features #' @param model The model to estimate as a logical vector #' @param complex A list of complexity measures for the features -#' @param params A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors and in Gaussian models +#' @param params A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors where the corresponding integers as prior_beta must be provided "g-prior" = 0, "hyper-g" = 1, "EB-local" = 2, "BIC" = 3, "ZS-null" = 4, "ZS-full" = 5, "hyper-g-laplace" = 6, "AIC" = 7, "EB-global" = 2, "hyper-g-n" = 8, "JZS" = 9 and in Gaussian models #' #' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). #' #' @examples #' lm.logpost.bas(rnorm(100), cbind(1,matrix(rnorm(100))), c(TRUE,TRUE), list(oc = 1)) -#' +#' #' #' @export lm.logpost.bas -lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5),prior_beta = "g-prior",alpha = 4)) { +lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), beta_prior = list(method = 1))) { if (length(params) == 0) - params <- list(r = 1/dim(x)[1], prior_beta = "g-prior",alpha = max(dim(x)[1],sum(model)^2)) - - method.num <- switch( - params$prior_beta, - "g-prior" = 0, - "hyper-g" = 1, - "EB-local" = 2, - "BIC" = 3, - "ZS-null" = 4, - "ZS-full" = 5, - "hyper-g-laplace" = 6, - "AIC" = 7, - "EB-global" = 2, - "hyper-g-n" = 8, - "JZS" = 9 - ) - - p <- sum(model) - 1 - if(p==0) - { - probinit <- as.numeric(c(1,0.99)) + params <- list( + r = 1 / dim(x)[1], + beta_prior = list(method = 0, alpha = max(dim(x)[1], sum(model)^2)) + ) else if(length(params$r) == 0) params$r = 1 / dim(x)[1] + + p <- sum(model) - 1 + if (p == 0) { + probinit <- as.numeric(c(1, 0.99)) model[2] <- T - }else{ - probinit <- as.numeric(c(1,rep(0.99,p))) + } else { + probinit <- as.numeric(c(1, rep(0.99, p))) } - - mod<-NULL + + mod <- NULL tryCatch({ suppressWarnings({ mod <- .Call(BAS:::C_deterministic, - y = y, X = as.matrix(x[,model]), + y = y, X = as.matrix(x[, model]), as.numeric(rep(1, length(y))), probinit, - as.integer(rep(0,ifelse(p==0,2,1))), + as.integer(rep(0, ifelse(p == 0,2,1))), incint = as.integer(F), - alpha = as.numeric(params$alpha), - method = as.integer(method.num), + alpha = ifelse(length(params$beta_prior$a) > 0, as.numeric(params$beta_prior$a),NULL), + method = as.integer(params$beta_prior$method), modelprior = uniform(), Rpivot = TRUE, Rtol = 1e-7) @@ -166,18 +140,17 @@ lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5),pr # You can also print a message or log the error if needed cat("An error occurred:", conditionMessage(e), "\n") }) - - if(length(mod)==0) { - return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)),coefs = rep(0,p+1))) + + if (length(mod) == 0 || is.nan(mod$logmarg[2])) { + return(list(crit = -.Machine$double.xmax + log_prior(params, complex), coefs = rep(0, p + 1))) } - - if(p == 0) - { - ret <- mod$logmarg[2] + log(params$r) * sum(complex$oc) - return(list(crit=ret, coefs=mod$mle[[2]])) + + if (p == 0) { + ret <- mod$logmarg[2] + log(params$r) * sum(complex$oc) + return(list(crit = ret, coefs = mod$mle[[2]])) } - ret <- mod$logmarg + log(params$r) * sum(complex$oc) - return(list(crit=ret, coefs=mod$mle[[1]])) + ret <- mod$logmarg + log(params$r) * sum(complex$oc) + return(list(crit = ret, coefs = mod$mle[[1]])) } @@ -195,15 +168,65 @@ lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5),pr #' #' @examples #' logistic.loglik(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) -#' +#' #' #' @export logistic.loglik logistic.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5))) { if (length(params) == 0) - params <- list(r = 1/dim(x)[1]) + params <- list(r = 1 / dim(x)[1]) + else if(length(params$r) == 0) + params$r = 1 / dim(x)[1] suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = binomial())}) + + if (length(mod) == 0 || is.nan(mod$deviance)) { + return(list(crit = -.Machine$double.xmax + log_prior(params, complex), coefs = rep(0,sum(model)))) + } + + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * sum(complex$oc))) / 2 - return(list(crit=ret, coefs=mod$coefficients)) + return(list(crit = ret, coefs = mod$coefficients)) +} + +#' Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user, family must be specified +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' glm.loglik(abs(rnorm(100))+1, matrix(rnorm(100)), TRUE, list(oc = 1)) +#' +#' +#' @export glm.loglik +glm.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5), family = "Gamma")) { + if (length(params) == 0) + params <- list(r = 1 / dim(x)[1]) + else if(length(params$r) == 0) + params$r = 1 / dim(x)[1] + + if (params$family == "binomial") { + fam = binomial() + } else if (params$family == "poisson") { + fam = poisson() + } else { + fam = Gamma() + } + + #browser() + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = fam)}) + + if (length(mod) == 0 || is.nan(mod$deviance)) { + return(list(crit = -.Machine$double.xmax + log_prior(params, complex), coefs = rep(0,sum(model)))) + } + + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * sum(complex$oc))) / 2 + return(list(crit = ret, coefs = mod$coefficients)) } @@ -219,24 +242,26 @@ logistic.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5))) #' #' @examples #' gaussian.loglik(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), NULL) -#' +#' #' #' @export gaussian.loglik gaussian.loglik <- function (y, x, model, complex, params) { - if(length(params)==0) + if (sum(model) == 0) + return(list(crit = -Inf, coefs = numeric())) + if (length(params) == 0) params <- list() if (length(params$r) == 0) params$r <- 1/dim(x)[1] - if(length(params$var) == 0) - params$var <- 1 + if (length(params$beta_prior$var) == 0) + params$beta_prior$var <- 1 suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = gaussian())}) - - if(params$var == "unknown") + + if (params$beta_prior$var == "unknown") ret <- (-(mod$aic + (log(length(y))-2) * (mod$rank) - 2 * log(params$r) * (sum(complex$oc)))) / 2 else - ret <- (-(mod$deviance/params$var + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * (sum(complex$oc)))) / 2 - - return(list(crit=ret, coefs=mod$coefficients)) + ret <- (-(mod$deviance/params$beta_prior$var + log(length(y)) * (mod$rank - 1) - 2 * log_prior(params, complex))) / 2 + + return(list(crit = ret, coefs = mod$coefficients)) } @@ -254,32 +279,37 @@ gaussian.loglik <- function (y, x, model, complex, params) { #' gaussian.loglik.g(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) #' #' @export gaussian.loglik.g -gaussian.loglik.g <- function (y, x, model, complex, params = NULL) -{ - +gaussian.loglik.g <- function (y, x, model, complex, params = NULL) { + if (sum(model) == 0) + return(list(crit = -Inf, coefs = numeric())) + if (length(params) == 0) + params <- list() + if (length(params$r) == 0) + params$r <- 1/dim(x)[1] suppressWarnings({ mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) }) - # Calculate R-squared y_mean <- mean(y) TSS <- sum((y - y_mean)^2) RSS <- sum(mod$residuals^2) Rsquare <- 1 - (RSS / TSS) - - if (length(params$r) == 0) - { - params$r <- 1/dim(x)[1] - params$g <- max(mod$rank^2,length(y)) + + if (length(params$r) == 0 || length(params$g) == 0) { + params$r <- 1 / dim(x)[1] + if (!is.null(params$beta_prior$g)) + params$g <- params$beta_prior$g + else + params$g <- max(mod$rank^2, length(y)) } - + # logarithm of marginal likelihood - mloglik <- 0.5*(log(1.0 + params$g) * (dim(x)[1] - mod$rank) - log(1.0 + params$g * (1.0 - Rsquare)) * (dim(x)[1] - 1))*(mod$rank!=1) - + mloglik <- 0.5 * (log(1.0 + params$g) * (dim(x)[1] - mod$rank) - log(1.0 + params$g * (1.0 - Rsquare)) * (dim(x)[1] - 1)) * (mod$rank != 1) + # logarithm of model prior # default value or parameter r lp <- log_prior(params, complex) - + return(list(crit = mloglik + lp, coefs = mod$coefficients)) } @@ -299,13 +329,12 @@ gaussian.loglik.g <- function (y, x, model, complex, params = NULL) #' \item{coefs}{Posterior mode of the coefficients.} #' #' @examples -#' gaussian_tcch_log_likelihood(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) +#' gaussian_tcch_log_likelihood(rnorm(100), matrix(rnorm(100)), c(TRUE), list(oc=1)) #' #' @importFrom BAS phi1 hypergeometric1F1 hypergeometric2F1 #' @importFrom tolerance F1 -#' @export -gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = exp(-0.5), prior_beta = "Intrinsic")) { - +#' @export +gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = exp(-0.5), beta_prior = list(type = "intrinsic"))) { # Fit the linear model using fastglm fitted_model <- fastglm(as.matrix(x[, model]), y, family = gaussian()) log_likelihood <- -(fitted_model$aic -2 * (fitted_model$rank))/2 @@ -314,130 +343,129 @@ gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = TSS <- sum((y - y_mean)^2) RSS <- sum(fitted_model$residuals^2) R2_M <- 1 - (RSS / TSS) - + p_M <- fitted_model$rank n <- length(y) - + # Switch-like structure to assign hyperparameters based on prior - if (params$prior_beta == "CH") { + hyper <- params$beta_prior + if (params$beta_prior$type == "CH") { # CH prior: b and s should be user-specified, with defaults if not provided - a <- ifelse(!is.null(params$a),params$a, 1) # Default to 1 if not specified - b <- ifelse(!is.null(params$b),params$b, 2) # Default to 1 if not specified + a <- ifelse(!is.null(hyper$a), hyper$a, 1) # Default to 1 if not specified + b <- ifelse(!is.null(hyper$b), hyper$b, 2) # Default to 1 if not specified r <- 0 - s <- ifelse(!is.null(params$s), params$s, 1) # Default to 1 if not specified + s <- ifelse(!is.null(hyper$s), hyper$s, 1) # Default to 1 if not specified v <- 1 k <- 1 - - } else if (params$prior_beta == "Hyper-g") { + } else if (params$beta_prior$type == "hyper-g") { a <- 1 b <- 2 r <- 0 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta == "Uniform") { + } else if (params$beta_prior$type == "uniform") { a <- 2 b <- 2 r <- 0 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta == "Jeffreys") { + } else if (params$beta_prior$type == "Jeffreys") { a <- 0.0001 b <- 2 r <- 0 s <- 0 v <- 1 k <- 1 - } else if (params$prior_beta == "Beta-prime") { + } else if (params$beta_prior$type == "beta.prime") { a <- 1/2 b <- n - p_M - 1.5 r <- 0 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta == "Benchmark") { + } else if (params$beta_prior$type == "benchmark") { a <- 0.02 b <- 0.02 * max(n, p_M^2) r <- 0 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta == "TruncGamma") { - - a <- 2 * ifelse(!is.null(params$at),params$at, 1) + } else if (params$beta_prior$type == "TG") { + a <- 2 * ifelse(!is.null(hyper$a), hyper$a, 1) b <- 2 r <- 0 - s <- 2 * ifelse(!is.null(params$st),params$st, 1) + s <- 2 * ifelse(!is.null(hyper$s), hyper$s, 1) v <- 1 k <- 1 - - } else if (params$prior_beta == "ZS adapted") { + } else if (params$beta_prior$type == "ZS-adapted") { a <- 1 b <- 2 r <- 0 s <- n + 3 v <- 1 k <- 1 - } else if (params$prior_beta == "Robust") { + } else if (params$beta_prior$type == "robust") { a <- 1 b <- 2 r <- 1.5 s <- 0 v <- (n + 1) / (p_M + 1) k <- 1 - - } else if (params$prior_beta == "Hyper-g/n") { + } else if (params$beta_prior$type == "hyper-g-n") { a <- 1 b <- 2 r <- 1.5 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta == "Intrinsic") { + } else if (params$beta_prior$type == "intrinsic") { a <- 1 b <- 1 r <- 1 s <- 0 v <- (n + p_M + 1) / (p_M + 1) k <- (n + p_M + 1) / n - + } else if (params$beta_prior$type == "tCCH") { + a <- hyper$a + b <- hyper$b + r <- hyper$rho + s <- hyper$s + v <- hyper$v + k <- hyper$k } else { - stop("Unknown prior name: ", params$prior_beta) + stop("Unknown prior name: ", params$beta_prior$type) } - - # + if (!is.null(r) & r == 0) { - term1 <- lbeta((a + p_M) / 2, b / 2) - term2 <- phi1(b / 2, (n - 1) / 2, (a + b + p_M) / 2, s / (2 * v), min(0.8,R2_M/(v - (v - 1) * R2_M),log = T)) - - if(R2_M/(v - (v - 1) * R2_M)>0.8) - { + term2 <- phi1(b / 2, (n - 1) / 2, (a + b + p_M) / 2, s / (2 * v), min(0.8, R2_M / (v - (v - 1) * R2_M), log = TRUE)) + + if (R2_M / (v - (v - 1) * R2_M) > 0.8) { warning("Infinite marginal log likelihood! phi1 last argument reduced to 0.8. Use a different prior_beta (Robust, Hyper-g/n, Intrinsic, or g-prior)") } - - term3 <- lbeta(a / 2, b / 2) - term4 <- hypergeometric1F1(b / 2, (a + b) / 2, s / (2 * v),log = T) - marginal_likelihood <- log_likelihood + (term1) + (term2) - (p_M / 2) * log(v) - ((n - 1) / 2)*log(1 - (1 - 1 / v) * R2_M) - (term3) - (term4) + + term3 <- lbeta(a / 2, b / 2) + term4 <- hypergeometric1F1(b / 2, (a + b) / 2, s / (2 * v), log = TRUE) + marginal_likelihood <- log_likelihood + (term1) + (term2) - (p_M / 2) * log(v) - ((n - 1) / 2) * log(1 - (1 - 1 / v) * R2_M) - (term3) - (term4) } else if (!is.null(s) & s == 0) { term1 <- lbeta((a + p_M) / 2, b / 2) - term2 <- hypergeometric2F1(r, b / 2, (a + b) / 2, 1 - k,log = T) + term2 <- hypergeometric2F1(r, b / 2, (a + b) / 2, 1 - k, log = TRUE) term3 <- F1((a + p_M) / 2, (a + b + p_M + 1 - n - 2 * r) / 2, (n - 1) / 2, (a + b + p_M) / 2, 1 - k, 1 - k - (R2_M^2 * k) / ((1 - R2_M) * v)) - marginal_likelihood <- log_likelihood + (a+p_M-2*r)/2*log(k) + (term1) - (term2) - (term3) - (p_M / 2) * log(v) - log(1 - R2_M) * ((n - 1) / 2) - lbeta(a / 2, b / 2) - + marginal_likelihood <- log_likelihood + (a + p_M - 2 * r) / 2 * log(k) + (term1) - (term2) - (term3) - (p_M / 2) * log(v) - log(1 - R2_M) * ((n - 1) / 2) - lbeta(a / 2, b / 2) + } else { stop("Invalid inputs: either r = 0 or s = 0 must be specified.") } - - if (length(params$r) == 0) params$r <- 1/dim(x)[1] # default value or parameter r - + + if (length(params$r) == 0) params$r <- 1 / dim(x)[1] # default value or parameter r + lp <- log_prior(params, complex) + + if (is.nan(marginal_likelihood)) { + return(list(crit = -.Machine$double.xmax + lp, coefs = rep(0,sum(model)))) + } return(list(crit = marginal_likelihood + lp, coefs = fitted_model$coefficients)) } @@ -458,7 +486,7 @@ gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = #' #' @examples #' logistic.loglik.ala(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) -#' +#' #' #' @export logistic.loglik.ala logistic.loglik.ala <- function (y, x, model, complex, params = list(r = exp(-0.5))) { @@ -498,7 +526,7 @@ logistic.loglik.alpha <- function (a, data, mu_func) { #' as a string with the alphas as a\[i\]. #' #' @return A numeric with the log likelihood. -#' @examples +#' @examples #'\dontrun{ #'gaussian.loglik.alpha(my_alpha,my_data,my_mu) #'} @@ -509,20 +537,22 @@ gaussian.loglik.alpha <- function (a, data, mu_func) { } -#' Log model prior function +#' Log model prior function #' @param params list of passed parameters of the likelihood in GMJMCMC -#' @param complex list of complexity measures of the features included into the model -#' +#' @param complex list of complexity measures of the features included into the model +#' #' @return A numeric with the log model prior. -#' +#' #' @examples #' log_prior(params = list(r=2), complex = list(oc = 2)) -#' +#' #' @export log_prior log_prior <- function (params, complex) { pl <- log(params$r) * (sum(complex$oc)) return(pl) } + + #' Master Log Marginal Likelihood Function #' #' This function serves as a unified interface to compute the log marginal likelihood @@ -534,167 +564,74 @@ log_prior <- function (params, complex) { #' @param complex A list of complexity measures for the features. #' @param params A list of parameters controlling the model family, prior, and tuning parameters. #' Key elements include: -#' - family: "binomial", "poisson", "gamma", or "gaussian" (default: "gaussian") +#' - family: "binomial", "poisson", "gamma" (all three referred to as GLM below), or "gaussian" (default: "gaussian") #' - prior_beta: Type of prior as a string (default: "g-prior"). Possible values include: -#' - "beta.prime": Beta-prime prior (GLM, requires `n`) -#' - "bic.prior": BIC-based prior (GLM, requires `n`) -#' - "CCH": Chen-Clyde-Hsu prior (GLM, requires `a`, `b`, optionally `s`) -#' - "EB.local": Empirical Bayes local prior (GLM/Gaussian BAS, requires `alpha` for Gaussian) +#' - "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +#' - "CH": Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, optionally `s`) +#' - "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires `a` for Gaussian) +#' - "EB-global": Empirical Bayes local prior (Gaussian, requires `a` for Gaussian) #' - "g-prior": Zellner's g-prior (GLM/Gaussian, requires `g`) -#' - "hyper.g": Hyper-g prior (GLM, requires `a`) -#' - "hyper.g.n": Hyper-g/n prior (GLM, requires `a`, `n`) -#' - "tCCH": Truncated Chen-Clyde-Hsu prior (GLM, requires `a`, `b`, optionally `s`, `rho`, `v`, `k`) -#' - "intrinsic": Intrinsic prior (GLM, requires `n`) -#' - "testBF.prior": Test Bayes factor prior (GLM, requires `g`) -#' - "TG": Truncated Gamma prior (GLM, requires `a`) +#' - "hyper-g": Hyper-g prior (GLM/Gaussian, requires `a`) +#' - "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires `a`) +#' - "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, `s`, `rho`, `v`, `k`) +#' - "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +#' - "TG": Truncated Gamma prior (GLM/Gamma, requires `a`, `s`) #' - "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) -#' - "uniform": Uniform prior (GLM, no additional args) -#' - "CH": Custom Chen-Hsu prior (Gaussian TCCH, requires `a`, `b`, optionally `s`) -#' - "Hyper-g": Hyper-g prior (Gaussian TCCH, no additional args) -#' - "Uniform": Uniform prior (Gaussian TCCH, no additional args) -#' - "Beta-prime": Beta-prime prior (Gaussian TCCH, no additional args) -#' - "Benchmark": Benchmark prior (Gaussian TCCH, no additional args) -#' - "TruncGamma": Truncated Gamma prior (Gaussian TCCH, requires `at`, `st`) -#' - "ZS adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) -#' - "Robust": Robust prior (Gaussian TCCH, no additional args) -#' - "Hyper-g/n": Hyper-g/n prior (Gaussian TCCH, no additional args) -#' - "Intrinsic": Intrinsic prior (Gaussian TCCH, no additional args) -#' - "hyper-g": Hyper-g prior (Gaussian BAS, requires `alpha`) -#' - "BIC": BIC prior (Gaussian BAS, requires `alpha`) -#' - "ZS-null": Zellner-Siow null prior (Gaussian BAS, requires `alpha`) -#' - "ZS-full": Zellner-Siow full prior (Gaussian BAS, requires `alpha`) -#' - "hyper-g-laplace": Hyper-g Laplace prior (Gaussian BAS, requires `alpha`) -#' - "AIC": AIC prior (Gaussian BAS, requires `alpha`) -#' - "JZS": Jeffreys-Zellner-Siow prior (Gaussian BAS, requires `alpha`) -#' - r: Model complexity penalty (default: 1/length(y)) -#' - g: Tuning parameter for g-prior (default: max(n, p^2)) +#' - "uniform": Uniform prior (GLM/Gaussian, no additional args) +#' - "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +#' - "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +#' - "robust": Robust prior (Gaussian/GLM, no additional args) +#' - "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +#' - "ZS-null": Zellner-Siow null prior (Gaussian, requires `a`) +#' - "ZS-full": Zellner-Siow full prior (Gaussian, requires `a`) +#' - "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires `a`) +#' - "AIC": AIC prior from BAS (Gaussian, requires penalty `a`) +#' - "BIC": BIC prior from BAS (Gaussian/GLM) +#' - "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires `a`) +#' - r: Model complexity penalty (default: 1/n) +#' - a: Tuning parameter for g-prior (default: max(n, p^2)) #' - a, b, s, v, rho, k: Hyperparameters for various priors -#' - at, st: Additional parameters for TruncGamma prior #' - n: Sample size for some priors (default: length(y)) #' - var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") -#' - laplace: Logical for Laplace approximation in GLM (default: FALSE) +#' - laplace: Logical for Laplace approximation in GLM only (default: FALSE) #' #' @return A list with elements: #' \item{crit}{Log marginal likelihood combined with the log prior.} #' \item{coefs}{Posterior mode of the coefficients.} #' #' @examples -#' fbms.mlik.master(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), list(family = "gaussian", prior_beta = "g-prior")) -#' -#' @importFrom BAS beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic testBF.prior TG Jeffreys uniform +#' fbms.mlik.master(y = rnorm(100), +#' x = matrix(rnorm(100)), +#' c(TRUE,TRUE), +#' list(oc = 1), +#' params = list(family = "gaussian", beta_prior = list(type = "g-prior", a = 2), +#' r = exp(-0.5))) +#' +#' @importFrom BAS robust beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform #' @export -fbms.mlik.master <- function(y, x, model, complex, params = list(family = "gaussian", prior_beta = "g-prior", r = exp(-0.5))) { +fbms.mlik.master <- function(y, x, model, complex, params = list(family = "gaussian", beta_prior = list(type = "g-prior"), r = exp(-0.5))) { # Extract dimensions n <- length(y) p <- sum(model) - 1 # Number of predictors excluding intercept + params_use <- list() + + if(length(params$r) == 0) + params$r = 1/length(y) - # Set default parameters if not fully specified - if (is.null(params$family)) params$family <- "gaussian" - if (is.null(params$prior_beta)) params$prior_beta <- "g-prior" - if (is.null(params$g)) params$g <- max(p^2, n) - if (is.null(params$r)) params$r <- 1/length(y) - - # Ensure complex has oc if not provided - if (is.null(complex$oc)) complex$oc <- 1 + if(params$family == "gaussian") + params_use$beta_prior <- gen.mlpost.params.lm(params$beta_prior$type, params$beta_prior, p+1, n) + else + { + params_use$beta_prior <- gen.mlpost.params.glm(params$beta_prior$type, params$beta_prior, p+1, n) + params_use$family <- params$family + } - # Homogenize and prepare params for nested calls - params_master <- params - params_nested <- list(r = params_master$r) + - # Define valid priors for each family - glm_priors <- c("beta.prime", "bic.prior", "CCH", "EB.local", "g-prior", "hyper.g", "hyper.g.n", - "tCCH", "intrinsic", "testBF.prior", "TG", "Jeffreys", "uniform") - gaussian_tcch_priors <- c("CH", "Hyper-g", "Uniform", "Beta-prime", "Benchmark", "TruncGamma", - "ZS adapted", "Robust", "Hyper-g/n", "Intrinsic") - gaussian_bas_priors <- c("g-prior", "hyper-g", "EB.local", "BIC", "ZS-null", "ZS-full", - "hyper-g-laplace", "AIC", "JZS") + loglik.pi <- select.mlpost.fun(params$beta_prior$type, params$family) - # Decision logic based on family and prior_beta - if (params_master$family %in% c("binomial", "poisson", "gamma")) { - if (!params_master$prior_beta %in% glm_priors) { - stop(sprintf("Prior '%s' is not supported for family '%s'. Supported GLM priors: %s", - params_master$prior_beta, params_master$family, paste(glm_priors, collapse = ", "))) - } - - params_nested$family <- params_master$family - if (is.null(params_master$laplace)) params_nested$laplace <- FALSE else params_nested$laplace <- params_master$laplace - - if (params_master$prior_beta == "Jeffreys" && params_master$family == "binomial") { - # Use logistic.loglik for binomial with Jeffreys prior and BIC approximation - result <- logistic.loglik(y, x, model, complex, params_nested) - } else { - # Use glm.logpost.bas for binomial, poisson, or gamma with BAS priors - params_nested$prior_beta <- switch( - params_master$prior_beta, - "beta.prime" = beta.prime(n = if (is.null(params_master$n)) n else params_master$n), - "bic.prior" = bic.prior(n = if (is.null(params_master$n)) n else params_master$n), - "CCH" = CCH(alpha = if (is.null(params_master$a)) 1 else params_master$a, - beta = if (is.null(params_master$b)) 2 else params_master$b, - s = if (is.null(params_master$s)) 0 else params_master$s), - "EB.local" = EB.local(), - "g-prior" = g.prior(g = if (is.null(params_master$g)) max(n, p + 1) else params_master$g), - "hyper.g" = hyper.g(alpha = if (is.null(params_master$a)) 3 else params_master$a), - "hyper.g.n" = hyper.g.n(alpha = if (is.null(params_master$a)) 3 else params_master$a, - n = if (is.null(params_master$n)) n else params_master$n), - "tCCH" = tCCH(alpha = if (is.null(params_master$a)) 1 else params_master$a, - beta = if (is.null(params_master$b)) 2 else params_master$b, - s = if (is.null(params_master$s)) 0 else params_master$s, - r = if (is.null(params_master$rho)) 3/2 else params_master$rho, - v = if (is.null(params_master$v)) 1 else params_master$v, - theta = if (is.null(params_master$k)) 1 else params_master$k), - "intrinsic" = intrinsic(n = if (is.null(params_master$n)) n else params_master$n), - "testBF.prior" = testBF.prior(g = if (is.null(params_master$g)) max(n, p + 1) else params_master$g), - "TG" = TG(alpha = if (is.null(params_master$a)) 2 else params_master$a), - "Jeffreys" = Jeffreys(), - "uniform" = uniform(), - stop("Unrecognized prior_beta for GLM: ", params_master$prior_beta) - ) - result <- glm.logpost.bas(y, x, model, complex, params_nested) - } - } else if (params_master$family == "gaussian") { - if (!(params_master$prior_beta %in% c(glm_priors, gaussian_tcch_priors, gaussian_bas_priors))) { - stop(sprintf("Prior '%s' is not supported for family 'gaussian'. Supported priors: %s, %s, %s", - params_master$prior_beta, paste(glm_priors, collapse = ", "), - paste(gaussian_tcch_priors, collapse = ", "), paste(gaussian_bas_priors, collapse = ", "))) - } - - params_nested$r <- params_master$r - - if (params_master$prior_beta == "g-prior" && is.null(params_master$method.num)) { - # Use gaussian.loglik.g for Zellner's g-prior - if (is.null(params_master$g)) params_nested$g <- max(n, p^2) else params_nested$g <- params_master$g - result <- gaussian.loglik.g(y, x, model, complex, params_nested) - } else if (params_master$prior_beta == "Jeffreys" && is.null(params_master$method.num)) { - # Use gaussian.loglik for Jeffreys prior with BIC approximation - if (is.null(params_master$var)) params_nested$var <- "unknown" else params_nested$var <- params_master$var - result <- gaussian.loglik(y, x, model, complex, params_nested) - } else if (params_master$prior_beta %in% gaussian_tcch_priors) { - # Use gaussian_tcch_log_likelihood for TCCH priors - params_nested$prior_beta <- params_master$prior_beta - if (!is.null(params_master$a)) params_nested$a <- params_master$a - if (!is.null(params_master$b)) params_nested$b <- params_master$b - if (!is.null(params_master$s)) params_nested$s <- params_master$s - if (!is.null(params_master$v)) params_nested$v <- params_master$v - if (!is.null(params_master$k)) params_nested$k <- params_master$k - if (!is.null(params_master$at)) params_nested$at <- params_master$at - if (!is.null(params_master$st)) params_nested$st <- params_master$st - result <- gaussian_tcch_log_likelihood(y, x, model, complex, params_nested) - } else if (params_master$prior_beta %in% gaussian_bas_priors) { - # Use lm.logpost.bas for BAS priors - params_nested$prior_beta <- params_master$prior_beta - if (is.null(params_master$alpha)) params_nested$alpha <- max(n, (p + 1)^2) else params_nested$alpha <- params_master$alpha - result <- lm.logpost.bas(y, x, model, complex, params_nested) - } else { - stop(sprintf("Prior '%s' is not supported for Gaussian family in this context.", params_master$prior_beta)) - } - } else { - stop("Unsupported family: ", params_master$family, ". Supported families are 'binomial', 'poisson', 'gamma', or 'gaussian'.") - } - - # Ensure consistent return structure - if (is.null(result$crit) || is.null(result$coefs)) { - stop("Error in computation: Returned result does not contain 'crit' and 'coefs'.") - } + result <- loglik.pi(y,x,model,complex,params_use) + return(list(crit = result$crit, coefs = result$coefs)) } \ No newline at end of file diff --git a/R/likelihoods2.R b/R/likelihoods2.R new file mode 100644 index 0000000000000000000000000000000000000000..0b4d7b90a4590695862e981b7d626ca2416c4c20 --- /dev/null +++ b/R/likelihoods2.R @@ -0,0 +1,800 @@ +# Title : Log likelihood functions +# Objective : Log likelihood functions with priors to be used as templates or directly in GMJMCMC +# Created by: jonlachmann +# Created on: 2021-02-24 + +#' Log likelihood function for glm regression with parameter priors from BAS package +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors and family that BAS uses in glm models +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' glm.logpost.bas(as.integer(rnorm(100) > 0),cbind(1,matrix(rnorm(100))),c(TRUE,TRUE),list(oc = 1)) +#' +#' @importFrom BAS uniform Jeffreys g.prior +#' @importFrom stats poisson Gamma glm.control +#' @export glm.logpost.bas +glm.logpost.bas2 <- function (y, x, model, complex, params = list(r = exp(-0.5), family = "binomial", prior_beta = Jeffreys(), laplace = FALSE)) { + if (length(params) == 0) + params <- list(r = 1/dim(x)[1], family = "binomial", prior_beta = g.prior(max(dim(x)[1],sum(model)-1)), laplace = FALSE) + p <- sum(model) - 1 + if(p==0) + { + probinit <- as.numeric(c(1,0.99)) + model[2] <- T + }else{ + probinit <- as.numeric(c(1,rep(0.99,p))) + } + + mod<-NULL + + + tryCatch({ + if(params$family == "binomial") + suppressWarnings({ + mod <- .Call(BAS:::C_glm_deterministic, + y = as.numeric(y), X = as.matrix(x[,model]), + Roffset = as.numeric(rep(0, length(y))), + Rweights = as.numeric(rep(1, length(y))), + Rprobinit = probinit, + Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), + modelprior = uniform(), + betaprior = params$prior_beta, + family = binomial(), + Rcontrol = glm.control(), + Rlaplace = as.integer(params$laplace)) + }) + else if(params$family == "poisson") + suppressWarnings({ + mod <- .Call(BAS:::C_glm_deterministic, + y = as.numeric(y), X = as.matrix(x[,model]), + Roffset = as.numeric(rep(0, length(y))), + Rweights = as.numeric(rep(1, length(y))), + Rprobinit = probinit, + Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), + modelprior = uniform(), + betaprior = params$prior_beta, + family = poisson(), + Rcontrol = glm.control(), + Rlaplace = as.integer(params$laplace)) + }) + else{ + suppressWarnings({ + mod <- .Call(BAS:::C_glm_deterministic, + y = as.numeric(y), X = as.matrix(x[,model]), + Roffset = as.numeric(rep(0, length(y))), + Rweights = as.numeric(rep(1, length(y))), + Rprobinit = probinit, + Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), + modelprior = uniform(), + betaprior = params$prior_beta, + family = Gamma(), + Rcontrol = glm.control(), + Rlaplace = as.integer(params$laplace)) + })} + }, error = function(e) { + # Handle the error by setting result to NULL + mod <- NULL + # You can also print a message or log the error if needed + cat("An error occurred:", conditionMessage(e), "\n") + }) + + if(length(mod)==0) { + return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)),coefs = rep(0,p+1))) + } + + if(p == 0) + { + ret <- mod$logmarg[2] + log(params$r) * sum(complex$oc) + return(list(crit=ret, coefs=mod$mle[[2]])) + } + ret <- mod$logmarg + log(params$r) * sum(complex$oc) + return(list(crit=ret, coefs=mod$mle[[1]])) +} + + +#' Log likelihood function for Gaussian regression with parameter priors from BAS package +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors where the corresponding integers as prior_beta must be provided "g-prior" = 0, "hyper-g" = 1, "EB-local" = 2, "BIC" = 3, "ZS-null" = 4, "ZS-full" = 5, "hyper-g-laplace" = 6, "AIC" = 7, "EB-global" = 2, "hyper-g-n" = 8, "JZS" = 9 and in Gaussian models +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' lm.logpost.bas(rnorm(100), cbind(1,matrix(rnorm(100))), c(TRUE,TRUE), list(oc = 1)) +#' +#' +#' @export lm.logpost.bas +lm.logpost.bas2 <- function (y, x, model, complex, params = list(r = exp(-0.5),prior_beta = "g-prior",alpha = 4)) { + if (length(params) == 0) + params <- list(r = 1/dim(x)[1], prior_beta = 0,alpha = max(dim(x)[1],sum(model)^2)) + + + p <- sum(model) - 1 + if(p==0) + { + probinit <- as.numeric(c(1,0.99)) + model[2] <- T + }else{ + probinit <- as.numeric(c(1,rep(0.99,p))) + } + + mod<-NULL + + tryCatch({ + suppressWarnings({ + mod <- .Call(BAS:::C_deterministic, + y = y, X = as.matrix(x[,model]), + as.numeric(rep(1, length(y))), + probinit, + as.integer(rep(0,ifelse(p==0,2,1))), + incint = as.integer(F), + alpha = ifelse(length(params$alpha)>0,as.numeric(params$alpha),NULL), + method = as.integer(params$prior_beta), + modelprior = uniform(), + Rpivot = TRUE, + Rtol = 1e-7) + }) + }, error = function(e) { + # Handle the error by setting result to NULL + mod <- NULL + # You can also print a message or log the error if needed + cat("An error occurred:", conditionMessage(e), "\n") + }) + + if(length(mod)==0) { + return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)),coefs = rep(0,p+1))) + } + + if(p == 0) + { + ret <- mod$logmarg[2] + log(params$r) * sum(complex$oc) + return(list(crit=ret, coefs=mod$mle[[2]])) + } + ret <- mod$logmarg + log(params$r) * sum(complex$oc) + return(list(crit=ret, coefs=mod$mle[[1]])) +} + + +#' Log likelihood function for logistic regression with a Jeffreys parameter prior and BIC approximations of the posterior +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' logistic.loglik2(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) +#' +#' +#' @export logistic.loglik2 +logistic.loglik2 <- function (y, x, model, complex, params = list(r = exp(-0.5))) { + if (length(params) == 0) + params <- list(r = 1/dim(x)[1]) + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = binomial())}) + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * sum(complex$oc))) / 2 + return(list(crit=ret, coefs=mod$coefficients)) +} + +#' Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user, family must be specified +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' glm.loglik(abs(rnorm(100))+1, matrix(rnorm(100)), TRUE, list(oc = 1)) +#' +#' +#' @export glm.loglik +glm.loglik2 <- function (y, x, model, complex, params = list(r = exp(-0.5),family = "Gamma")) { + if (length(params) == 0) + params <- list(r = 1/dim(x)[1]) + + if(params$family == "binomial") + { + fam = binomial() + }else if(params$family == "poisson"){ + fam = poisson() + }else + { + fam = Gamma() + } + + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = fam)}) + + if (length(mod) == 0 || is.nan(mod$deviance)) { + return(list(crit = -.Machine$double.xmax + log_prior(params, complex), coefs = rep(0,sum(model)))) + } + + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * sum(complex$oc))) / 2 + return(list(crit=ret, coefs=mod$coefficients)) +} + + +#' Log likelihood function for gaussian regression with a Jeffreys prior and BIC approximation of MLIK with both known and unknown variance of the responses +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' gaussian.loglik(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), NULL) +#' +#' +#' @export gaussian.loglik +gaussian.loglik2 <- function (y, x, model, complex, params) { + if(length(params)==0) + params <- list() + if (length(params$r) == 0) + params$r <- 1/dim(x)[1] + if(length(params$var) == 0) + params$var <- 1 + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = gaussian())}) + + if(params$var == "unknown") + ret <- (-(mod$aic + (log(length(y))-2) * (mod$rank) - 2 * log(params$r) * (sum(complex$oc)))) / 2 + else + ret <- (-(mod$deviance/params$var + log(length(y)) * (mod$rank - 1) - 2 * log_prior(params, complex))) / 2 + + return(list(crit=ret, coefs=mod$coefficients)) +} + + +#' Log likelihood function for linear regression using Zellners g-prior +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' gaussian.loglik2.g(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) +#' +#' @export gaussian.loglik2.g +gaussian.loglik2.g <- function (y, x, model, complex, params = NULL) +{ + if(length(params)==0) + params <- list() + if (length(params$r) == 0) + params$r <- 1/dim(x)[1] + suppressWarnings({ + mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) + }) + # Calculate R-squared + y_mean <- mean(y) + TSS <- sum((y - y_mean)^2) + RSS <- sum(mod$residuals^2) + Rsquare <- 1 - (RSS / TSS) + + if (length(params$r) == 0 || length(params$g) == 0) + { + params$r <- 1/dim(x)[1] + params$g <- max(mod$rank^2,length(y)) + } + + # logarithm of marginal likelihood + mloglik <- 0.5*(log(1.0 + params$g) * (dim(x)[1] - mod$rank) - log(1.0 + params$g * (1.0 - Rsquare)) * (dim(x)[1] - 1))*(mod$rank!=1) + + # logarithm of model prior + # default value or parameter r + lp <- log_prior(params, complex) + + return(list(crit = mloglik + lp, coefs = mod$coefficients)) +} + + +#' Log likelihood function for Gaussian regression with parameter priors from BAS package +#' +#' This function computes the marginal likelihood of a Gaussian regression model under different priors. +#' +#' @param y A numeric vector containing the dependent variable. +#' @param x A matrix containing the independent variables, including an intercept column. +#' @param model A logical vector indicating which variables to include in the model. +#' @param complex A list containing complexity measures for the features. +#' @param params A list of parameters for the log likelihood, specifying the tuning parameters of beta priors. +#' +#' @return A list with elements: +#' \item{crit}{Log marginal likelihood combined with the log prior.} +#' \item{coefs}{Posterior mode of the coefficients.} +#' +#' @examples +#' gaussian_tcch_log_likelihood2(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) +#' +#' @importFrom BAS phi1 hypergeometric1F1 hypergeometric2F1 +#' @importFrom tolerance F1 +#' @export +gaussian_tcch_log_likelihood2 <- function(y, x, model, complex, params = list(r = exp(-0.5), prior_beta = "intrinsic")) { + + # Fit the linear model using fastglm + fitted_model <- fastglm(as.matrix(x[, model]), y, family = gaussian()) + log_likelihood <- -(fitted_model$aic -2 * (fitted_model$rank))/2 + # Compute R-squared manually + y_mean <- mean(y) + TSS <- sum((y - y_mean)^2) + RSS <- sum(fitted_model$residuals^2) + R2_M <- 1 - (RSS / TSS) + + p_M <- fitted_model$rank + n <- length(y) + + # Switch-like structure to assign hyperparameters based on prior + if (params$prior_beta[[1]] == "CH") { + # CH prior: b and s should be user-specified, with defaults if not provided + a <- ifelse(!is.null(params$prior_beta$a),params$prior_beta$a, 1) # Default to 1 if not specified + b <- ifelse(!is.null(params$prior_beta$b),params$prior_beta$b, 2) # Default to 1 if not specified + r <- 0 + s <- ifelse(!is.null(params$prior_beta$s), params$prior_beta$s, 1) # Default to 1 if not specified + v <- 1 + k <- 1 + + } else if (params$prior_beta[[1]] == "hyper-g") { + a <- 1 + b <- 2 + r <- 0 + s <- 0 + v <- 1 + k <- 1 + + } else if (params$prior_beta[[1]] == "uniform") { + a <- 2 + b <- 2 + r <- 0 + s <- 0 + v <- 1 + k <- 1 + + } else if (params$prior_beta[[1]] == "Jeffreys") { + a <- 0.0001 + b <- 2 + r <- 0 + s <- 0 + v <- 1 + k <- 1 + } else if (params$prior_beta[[1]] == "beta.prime") { + a <- 1/2 + b <- n - p_M - 1.5 + r <- 0 + s <- 0 + v <- 1 + k <- 1 + + } else if (params$prior_beta[[1]] == "benchmark") { + a <- 0.02 + b <- 0.02 * max(n, p_M^2) + r <- 0 + s <- 0 + v <- 1 + k <- 1 + + } else if (params$prior_beta[[1]] == "TG") { + + a <- 2 * ifelse(!is.null(params$prior_beta$a),params$prior_beta$a, 1) + b <- 2 + r <- 0 + s <- 2 * ifelse(!is.null(params$prior_beta$s),params$prior_beta$s, 1) + v <- 1 + k <- 1 + + } else if (params$prior_beta[[1]] == "ZS-adapted") { + a <- 1 + b <- 2 + r <- 0 + s <- n + 3 + v <- 1 + k <- 1 + } else if (params$prior_beta[[1]] == "robust") { + a <- 1 + b <- 2 + r <- 1.5 + s <- 0 + v <- (n + 1) / (p_M + 1) + k <- 1 + + } else if (params$prior_beta[[1]] == "hyper-g-n") { + a <- 1 + b <- 2 + r <- 1.5 + s <- 0 + v <- 1 + k <- 1 + + } else if (params$prior_beta[[1]] == "intrinsic") { + a <- 1 + b <- 1 + r <- 1 + s <- 0 + v <- (n + p_M + 1) / (p_M + 1) + k <- (n + p_M + 1) / n + + }else if (params$prior_beta[[1]] == "tCCH") { + a <- params$prior_beta$a + b <- params$prior_beta$b + r <- params$prior_beta$rho + s <- params$prior_beta$s + v <- params$prior_beta$v + k <- params$prior_beta$k + }else { + stop("Unknown prior name: ", params$prior_beta) + } + + # + if (!is.null(r) & r == 0) { + + term1 <- lbeta((a + p_M) / 2, b / 2) + term2 <- phi1(b / 2, (n - 1) / 2, (a + b + p_M) / 2, s / (2 * v), min(0.8,R2_M/(v - (v - 1) * R2_M),log = T)) + + if(R2_M/(v - (v - 1) * R2_M)>0.8) + { + warning("Infinite marginal log likelihood! phi1 last argument reduced to 0.8. Use a different prior_beta (Robust, Hyper-g/n, Intrinsic, or g-prior)") + } + + term3 <- lbeta(a / 2, b / 2) + term4 <- hypergeometric1F1(b / 2, (a + b) / 2, s / (2 * v),log = T) + marginal_likelihood <- log_likelihood + (term1) + (term2) - (p_M / 2) * log(v) - ((n - 1) / 2)*log(1 - (1 - 1 / v) * R2_M) - (term3) - (term4) + } else if (!is.null(s) & s == 0) { + term1 <- lbeta((a + p_M) / 2, b / 2) + term2 <- hypergeometric2F1(r, b / 2, (a + b) / 2, 1 - k,log = T) + term3 <- F1((a + p_M) / 2, (a + b + p_M + 1 - n - 2 * r) / 2, (n - 1) / 2, (a + b + p_M) / 2, 1 - k, 1 - k - (R2_M^2 * k) / ((1 - R2_M) * v)) + marginal_likelihood <- log_likelihood + (a+p_M-2*r)/2*log(k) + (term1) - (term2) - (term3) - (p_M / 2) * log(v) - log(1 - R2_M) * ((n - 1) / 2) - lbeta(a / 2, b / 2) + + } else { + stop("Invalid inputs: either r = 0 or s = 0 must be specified.") + } + + if (length(params$r) == 0) params$r <- 1/dim(x)[1] # default value or parameter r + + lp <- log_prior(params, complex) + + return(list(crit = marginal_likelihood + lp, coefs = fitted_model$coefficients)) +} + + + +#' Log likelihood function for logistic regression with an approximate Laplace approximations used +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' logistic.loglik2.ala(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) +#' +#' +#' @export logistic.loglik2.ala +logistic.loglik2.ala <- function (y, x, model, complex, params = list(r = exp(-0.5))) { + if (length(params) == 0) + params <- list(r = 1/dim(x)[1]) + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = binomial(),maxit = 1)}) + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) -2 * log(params$r) * sum(complex$oc))) / 2 + return(list(crit=ret, coefs=mod$coefficients)) +} + + + +#' Log likelihood function for logistic regression for alpha calculation +#' This function is just the bare likelihood function +#' +#' @param a A vector of the alphas to be used +#' @param data The data to be used for calculation +#' @param mu_func The function linking the mean to the covariates, +#' as a string with the alphas as a\[i\]. +#' +#' @return A numeric with the log likelihood. +#' +#' @export logistic.loglik2.alpha +logistic.loglik2.alpha <- function (a, data, mu_func) { + m <- 1 / (1 + exp(-eval(parse(text = mu_func)))) + -sum((data[,1] * log(m) + (1 - data[, 1]) * log(1 - m))) +} + + +#' Log likelihood function for gaussian regression for alpha calculation +#' This function is just the bare likelihood function +#' Note that it only gives a proportional value and is equivalent to least squares +#' +#' @param a A vector of the alphas to be used +#' @param data The data to be used for calculation +#' @param mu_func The function linking the mean to the covariates, +#' as a string with the alphas as a\[i\]. +#' +#' @return A numeric with the log likelihood. +#' @examples +#'\dontrun{ +#'gaussian.loglik2.alpha(my_alpha,my_data,my_mu) +#'} +#' @export gaussian.loglik2.alpha +gaussian.loglik2.alpha <- function (a, data, mu_func) { + m <- eval(parse(text=mu_func)) + sum((data[,1]-m)^2) +} + + +#' Log model prior function +#' @param params list of passed parameters of the likelihood in GMJMCMC +#' @param complex list of complexity measures of the features included into the model +#' +#' @return A numeric with the log model prior. +#' +#' @examples +#' log_prior(params = list(r=2), complex = list(oc = 2)) +#' +#' @export log_prior +log_prior <- function (params, complex) { + pl <- log(params$r) * (sum(complex$oc)) + return(pl) +} + + +#' Master Log Marginal Likelihood Function +#' +#' This function serves as a unified interface to compute the log marginal likelihood +#' for different regression models and priors by calling specific log likelihood functions. +#' +#' @param y A numeric vector containing the dependent variable. +#' @param x A matrix containing the precalculated features (independent variables). +#' @param model A logical vector indicating which variables to include in the model. +#' @param complex A list of complexity measures for the features. +#' @param params A list of parameters controlling the model family, prior, and tuning parameters. +#' Key elements include: +#' - family: "binomial", "poisson", "gamma" (all three referred to as GLM below), or "gaussian" (default: "gaussian") +#' - prior_beta: Type of prior as a string (default: "g-prior"). Possible values include: +#' - "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +#' - "CH": Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, optionally `s`) +#' - "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires `a` for Gaussian) +#' - "EB-global": Empirical Bayes local prior (Gaussian, requires `a` for Gaussian) +#' - "g-prior": Zellner's g-prior (GLM/Gaussian, requires `g`) +#' - "hyper-g": Hyper-g prior (GLM/Gaussian, requires `a`) +#' - "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires `a`) +#' - "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, `s`, `rho`, `v`, `k`) +#' - "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +#' - "TG": Truncated Gamma prior (GLM/Gamma, requires `a`, `s`) +#' - "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) +#' - "uniform": Uniform prior (GLM/Gaussian, no additional args) +#' - "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +#' - "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +#' - "robust": Robust prior (Gaussian/GLM, no additional args) +#' - "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +#' - "ZS-null": Zellner-Siow null prior (Gaussian, requires `a`) +#' - "ZS-full": Zellner-Siow full prior (Gaussian, requires `a`) +#' - "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires `a`) +#' - "AIC": AIC prior from BAS (Gaussian, requires penalty `a`) +#' - "BIC": BIC prior from BAS (Gaussian/GLM) +#' - "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires `a`) +#' - r: Model complexity penalty (default: 1/n) +#' - g: Tuning parameter for g-prior (default: max(n, p^2)) +#' - a, b, s, v, rho, k: Hyperparameters for various priors +#' - n: Sample size for some priors (default: length(y)) +#' - var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") +#' - laplace: Logical for Laplace approximation in GLM only (default: FALSE) +#' +#' @return A list with elements: +#' \item{crit}{Log marginal likelihood combined with the log prior.} +#' \item{coefs}{Posterior mode of the coefficients.} +#' +#' @examples +#' fbms.mlik.master(rnorm(100), matrix(rnorm(100)), c(TRUE,TRUE), list(oc = 1)) +#' +#' @importFrom BAS robust beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform +#' @export +fbms.mlik.master_old <- function(y, x, model, complex, params = list(family = "gaussian", prior_beta = "g-prior", r = exp(-0.5))) { + # Extract dimensions + n <- length(y) + p <- sum(model) - 1 # Number of predictors excluding intercept + + # Set default parameters if not fully specified + if (is.null(params$family)) params$family <- "gaussian" + if (is.null(params$prior_beta)) params$prior_beta <- "g-prior" + if (is.null(params$g)) params$g <- max(p^2, n) + if (is.null(params$n)) params$n <- n + if (is.null(params$r)) params$r <- 1/n + + # Ensure complex has oc if not provided, ignore by default + if (is.null(complex$oc)) complex$oc <- 0 + + # Homogenize and prepare params for nested calls + params_master <- params + params_nested <- list(r = params_master$r) + + # Define valid priors for each family + #glm_only_priors <- c("CCH", "tCCH", "TG") + glm_and_gaussian_priors <- c("CH", "tCCH", "TG","beta.prime", "EB-local", "g-prior", "hyper-g", "hyper-g-n", + "intrinsic", "ZS-adapted", "Jeffreys", "uniform", "benchmark", "robust", "Jeffreys-BIC") + gaussian_only_priors <- c("ZS-null", "ZS-full", "hyper-g-laplace","BIC", "AIC", "JZS","EB-global") + + #review a bit + gaussian_not_robust <- c("CH", "tCCH", "ZS-adapted", "TG","beta.prime", "benchmark","Jeffreys") + gaussian_robust <- c("g-prior", "hyper-g", "EB-local","BIC", "Jeffreys-BIC", "ZS-null", "ZS-full", "hyper-g-laplace", + "AIC", "hyper-g-n", "JZS") + gaussian_tcch <- c("CH", "tCCH", "TG","beta.prime", "intrinsic", "ZS-adapted", "uniform","Jeffreys", "benchmark", "robust") + gaussian_bas <- c("g-prior", "hyper-g", "EB-local","ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS") + + all_priors <- c(glm_and_gaussian_priors, gaussian_only_priors) + #browser() + # Validate prior_beta + if (!params_master$prior_beta %in% all_priors) { + stop(sprintf("Prior '%s' is not supported. Supported priors: %s", + params_master$prior_beta, paste(all_priors, collapse = ", "))) + } + + # Decision logic based on family and prior_beta + if (params_master$family %in% c("binomial", "poisson", "gamma")) { + if (params_master$prior_beta %in% gaussian_only_priors) { + stop(sprintf("Prior '%s' is not supported for GLM family '%s'. Supported GLM priors: %s", + params_master$prior_beta, params_master$family, + paste(c(glm_and_gaussian_priors), collapse = ", "))) + } + + params_nested$family <- params_master$family + if (is.null(params_master$laplace)) params_nested$laplace <- FALSE else params_nested$laplace <- params_master$laplace + + #if(params_nested$laplace) + # print("Laplace approximations will be used") + + if (params_master$prior_beta == "Jeffreys-BIC") { + if(params_nested$family == "binomial") + result <- logistic.loglik2(y, x, model, complex, params_nested) + else if(params_nested$family%in% c("poisson", "gamma")) + result <- glm.loglik2(y, x, model, complex, params_nested) + + } else { + params_nested$prior_beta <- switch( + params_master$prior_beta, + "beta.prime" = beta.prime(n = n), + "CH" = CCH(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + beta = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), + "EB-local" = EB.local(), + "g-prior" = g.prior(g = params_master$g), + "hyper-g" = hyper.g(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), + "tCCH" = tCCH(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + beta = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s, + r = if (is.null(params_master$rho)) stop("rho must be provided") else params_master$rho, + v = if (is.null(params_master$v)) stop("v must be provided") else params_master$v, + theta = if (is.null(params_master$k)) stop("k must be provided") else params_master$k), + "intrinsic" = intrinsic(n = params_master$n), + "TG" = TG(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), + "Jeffreys" = Jeffreys(), + "uniform" = tCCH(alpha = 2, + beta = 2, + s = 0, + r = 0, + v = 1, + theta = 1), + "benchmark" = tCCH(alpha = 0.02, + beta = 0.02*max(n,p^2), + s = 0, + r = 0, + v = 1, + theta = 1), + "ZS-adapted" = tCCH(alpha = 1, + beta = 2, + s = n + 3, + r = 0, + v = 1, + theta = 1), + "TG" = TG(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), + "robust" = robust(n = if (is.null(params_master$n)) as.numeric(n) else as.numeric(params_master$n)), + "hyper-g-n" = hyper.g.n(alpha = if (is.null(params_master$a)) 3 else params_master$a, + n = params_master$n), + "BIC" = bic.prior(n = if (is.null(params_master$n)) n else params_master$n), + stop("Unrecognized prior_beta for GLM: ", params_master$prior_beta) + ) + result <- glm.logpost.bas2(y, x, model, complex, params_nested) + } + } else if (params_master$family == "gaussian") { + + if (params_master$prior_beta %in% gaussian_not_robust) { + warning(sprintf("Prior '%s' is not reccomended supported for Gaussian family '%s'. Can be unstable for strong signals (R^2 > 0.9). Reccomended priors under Gaussian family: %s", + params_master$prior_beta, params_master$family, + paste(gaussian_robust, collapse = ", "))) + } + + params_nested$r <- params_master$r + + if (params_master$prior_beta %in% gaussian_tcch) { + + params_nested$prior_beta <- switch( + params_master$prior_beta, + "beta.prime" = list("beta.prime"), + "CH" = list("CH",a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + b = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), + "tCCH" = list("tCCH", a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + b = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s, + rho = if (is.null(params_master$rho)) stop("rho must be provided") else params_master$rho, + v = if (is.null(params_master$v)) stop("v must be provided") else params_master$v, + k = if (is.null(params_master$k)) stop("k must be provided") else params_master$k), + "intrinsic" = list("intrinsic"), + "TG" = list("TG",a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), + "Jeffreys" = list("Jeffreys"), + "ZS-adapted" = list("ZS-adapted"), + "benchmark" = list("benchmark"), + "robust" = list("robust"), + "uniform" = list("uniform"), + stop("Unrecognized prior_beta for Gaussian GLM: ", params_master$prior_beta) + ) + result <- gaussian_tcch_log_likelihood2(y, x, model, complex, params_nested) + + }else if (params_master$prior_beta == "Jeffreys-BIC") { + if (is.null(params_master$var)) params_nested$var <- "unknown" else params_nested$var <- params_master$var + result <- gaussian.loglik2(y, x, model, complex, params_nested) + } else if (params_master$prior_beta %in% gaussian_bas) { + + params_nested$prior_beta <- switch( + params_master$prior_beta, + "g-prior" = 0, + "hyper-g" = 1, + "EB-local" = 2, + "BIC" = 3, + "ZS-null" = 4, + "ZS-full" = 5, + "hyper-g-laplace" = 6, + "AIC" = 7, + "EB-global" = 2, + "hyper-g-n" = 8, + "JZS" = 9 + ) + if(params_master$prior_beta == "g-prior") + { + if (!is.null(params_master$g)) params_nested$g <- params_master$g else stop("g must be provided") + result <- gaussian.loglik2.g(y, x, model, complex, params_nested) + } + else{ + if (!is.null(params_master$a)) params_nested$alpha <- params_master$a else params_nested$alpha = -1 + result <- lm.logpost.bas2(y, x, model, complex, params_nested) + } + + } else { + stop("Unexpected error in prior_beta logic for Gaussian.") + } + } else { + stop("Unsupported family: ", params_master$family, ". Supported families are 'binomial', 'poisson', 'gamma', or 'gaussian'.") + } + + # Ensure consistent return structure + if (is.null(result$crit) || is.null(result$coefs)) { + stop("Error in computation: Returned result does not contain 'crit' and 'coefs'.") + } + + return(list(crit = result$crit, coefs = result$coefs)) +} \ No newline at end of file diff --git a/R/local_optim.R b/R/local_optim.R index 28696f0ce504f856fbdf3a57842ea18ce02583ef..a198329d5a95731eb3a85504d0092895b7e09df1 100644 --- a/R/local_optim.R +++ b/R/local_optim.R @@ -13,10 +13,9 @@ simulated.annealing <- function (model, data, loglik.pi, indices, complex, param temp <- params$t.init # Initial temperature # Calculate current likelihood - model.res <- loglik.pre(loglik.pi, model, complex, data, loglikparams,visited.models, sub) + model.res <- loglik.pre(loglik.pi, model, complex, data, loglikparams, visited.models, sub) model.lik <- model.res$crit - models[[length(models) + 1]] <- list(prob=NA, model=model, coefs=model.res$coefs, crit=model.lik, alpha=NA) - # print(paste("SA Start:", model.lik)) + models[[length(models) + 1]] <- list(prob = NA, model = model, coefs = model.res$coefs, crit = model.lik, alpha = NA) while (temp > params$t.min) { # Make M tries at current temperature for (m in 1:params$M) { @@ -25,7 +24,7 @@ simulated.annealing <- function (model, data, loglik.pi, indices, complex, param model.proposal <- loglik.pre(loglik.pi, proposal, complex, data, loglikparams, visited.models = visited.models, sub = sub) proposal.lik <- model.proposal$crit # Store the model that we have calculated - models[[length(models) + 1]] <- list(prob=NA, model=proposal, coefs=model.proposal$coefs, crit=proposal.lik, alpha=NA) + models[[length(models) + 1]] <- list(prob = NA, model = proposal, coefs = model.proposal$coefs, crit = proposal.lik, alpha = NA) # Calculate move probability for negative steps (Bolzmann distribution, see Blum and Roli p. 274) if (proposal.lik > model.lik) alpha <- 1 else alpha <- min(1, exp((proposal.lik - model.lik) / temp)) @@ -38,8 +37,7 @@ simulated.annealing <- function (model, data, loglik.pi, indices, complex, param # Update temperature temp <- temp * exp(-params$dt) } - # print(paste("SA Finish:", model.lik)) - return(list(model=model, kern=kernel, models=models)) + return(list(model = model, kern = kernel, models = models)) } greedy.optim <- function (model, data, loglik.pi, indices, complex, params, loglikparams, kernel = NULL, visited.models = NULL, sub = FALSE) { @@ -82,10 +80,10 @@ greedy.optim <- function (model, data, loglik.pi, indices, complex, params, logl local.optim <- function (model, data, loglik.pi, indices, complex, type, params, kernel = NULL, visited.models = NULL, sub = FALSE) { if (type == 1) { - return(simulated.annealing(model, data, loglik.pi, indices, complex, params$sa, params$loglik, kernel, visited.models = visited.models, sub = sub)) + return(simulated.annealing(model, data, loglik.pi, indices, complex, params$sa, params$mlpost, kernel, visited.models = visited.models, sub = sub)) } if (type == 2) { - return(greedy.optim(model, data, loglik.pi, indices, complex, params$greedy, params$loglik, kernel, visited.models = visited.models, sub = sub)) + return(greedy.optim(model, data, loglik.pi, indices, complex, params$greedy, params$mlpost, kernel, visited.models = visited.models, sub = sub)) } if (type == 3) { return("not implemented") diff --git a/R/mjmcmc.R b/R/mjmcmc.R index 8e6e57bb4cc6062c95df4ffb133a36b0d21f0e93..8dc38ac67d2131457fe7fc593c8153e2af20cbaa 100644 --- a/R/mjmcmc.R +++ b/R/mjmcmc.R @@ -5,9 +5,11 @@ #' Main algorithm for MJMCMC (Genetically Modified MJMCMC) #' -#' @param data A matrix containing the data to use in the algorithm, -#' first column should be the dependent variable, -#' and the rest of the columns should be the independent variables. +#' @param x matrix containing the design matrix with data to use in the algorithm, +#' @param y response variable +#' @param mlpost_params parameters for the estimator function loglik.pi +#' @param intercept whether intercept should be added to the design matrix (no model selection for intercept) +#' @param fixed how many of the first columns of the design matrix will always be included in the models #' @param loglik.pi The (log) density to explore #' @param N The number of iterations to run for #' @param probs A list of the various probability vectors to use @@ -26,31 +28,50 @@ #' \item{populations}{The covariates represented as a list of features.} #' #' @examples -#' result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) #' summary(result) #' plot(result) #' #' @export mjmcmc -mjmcmc <- function (data, loglik.pi = gaussian.loglik, N = 100, probs = NULL, params = NULL, sub = FALSE, verbose = TRUE) { +mjmcmc <- function ( + x, + y, + loglik.pi = fbms.mlik.master, + mlpost_params = list(family = "gaussian", beta_prior = list(type = "g-prior")), + intercept = TRUE, + fixed = 0, + N = 100, + probs = NULL, + params = NULL, + sub = FALSE, + verbose = TRUE +) { # Verify that data is well-formed - labels <- names(data)[-1] - data <- check.data(data, verbose) + if (intercept) { + x <- cbind(1, x) + fixed <- fixed + 1 + } + data <- check.data(x, y, fixed, verbose) + labels <- names(x) + if (fixed != 0) + labels <- labels[-seq_len(fixed)] # Generate default probabilities and parameters if there are none supplied. if (is.null(probs)) probs <- gen.probs.mjmcmc() - if (is.null(params)) params <- gen.params.mjmcmc(data) + if (is.null(params)) params <- gen.params.mjmcmc(ncol(data$x) - data$fixed) + if (!is.null(mlpost_params)) params$mlpost <- mlpost_params # Acceptance probability accept <- 0 # Create a population of just the covariates - S <- gen.covariates(ncol(data)-2) + S <- gen.covariates(data) complex <- complex.features(S) # Initialize first model model.cur <- as.logical(rbinom(n = length(S), size = 1, prob = 0.5)) - model.cur.res <- loglik.pre(loglik.pi, model.cur, complex, data, params$loglik, visited.models=NULL, sub = sub) - model.cur <- list(prob=0, model=model.cur, coefs=model.cur.res$coefs, crit=model.cur.res$crit, alpha=0) + model.cur.res <- loglik.pre(loglik.pi, model.cur, complex, data, params$mlpost, visited.models = NULL, sub = sub) + model.cur <- list(prob = 0, model = model.cur, coefs = model.cur.res$coefs, crit = model.cur.res$crit, alpha = 0) if (verbose) cat("\nMJMCMC begin.\n") result <- mjmcmc.loop(data, complex, loglik.pi, model.cur, N, probs, params, sub, verbose) @@ -60,7 +81,9 @@ mjmcmc <- function (data, loglik.pi = gaussian.loglik, N = 100, probs = NULL, pa result$populations <- S # Return formatted results + result$fixed <- data$fixed result$labels <- labels + result$intercept <- intercept class(result) <- "mjmcmc" return(result) } @@ -92,7 +115,7 @@ mjmcmc.loop <- function (data, complex, loglik.pi, model.cur, N, probs, params, # Acceptance count accept <- 0 # Number of covariates or features - covar_count <- ncol(data) - 2 + covar_count <- ncol(data$x) # A list of models that have been visited models <- vector("list", N) # Initialize a vector to contain local opt visited models @@ -213,7 +236,7 @@ mjmcmc.prop <- function (data, loglik.pi, model.cur, complex, pip_estimate, prob model.cur$prob <- prob.proposal(proposal$model, model.cur$model, q.g, params$mh, pip_estimate) } # Calculate log likelihoods for the proposed model - proposal.res <- loglik.pre(loglik.pi, proposal$model, complex, data, params$loglik, visited.models=visited.models, sub = sub) + proposal.res <- loglik.pre(loglik.pi, proposal$model, complex, data, params$mlpost, visited.models=visited.models, sub = sub) proposal$crit <- proposal.res$crit # Calculate acceptance probability for proposed model diff --git a/R/parallel.R b/R/parallel.R index 839d6b7b7458e8f1ba1f5142e3b612fa1de65940..d0eb1c1bdf4aa8fba461b062c5311aba936fd917 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -88,13 +88,20 @@ rmclapply <- function(runs, args, fun, mc.cores = NULL) { #' @return Merged results from multiple mjmcmc runs #' #' @examples -#' result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc.parallel(runs = 1, +#' cores = 1, +#' loglik.pi = FBMS::gaussian.loglik, +#' y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100)) #' summary(result) #' plot(result) #' #' @export mjmcmc.parallel <- function(runs = 2, cores = getOption("mc.cores", 2L), ...) { - results <- rmclapply(seq_len(runs), args = list(...), mc.cores = cores, fun = mjmcmc) + results <- list() + results$chains <- rmclapply(seq_len(runs), args = list(...), mc.cores = cores, fun = mjmcmc) + results$fixed <- results$chains[[1]]$fixed + results$intercept <- results$chains[[1]]$intercept class(results) <- "mjmcmc_parallel" gc() return(results) @@ -113,12 +120,10 @@ mjmcmc.parallel <- function(runs = 2, cores = getOption("mc.cores", 2L), ...) { #' result <- gmjmcmc.parallel( #' runs = 1, #' cores = 1, -#' list(populations = "best", complex.measure = 2, tol = 0.0000001), -#' matrix(rnorm(600), 100), -#' P = 2, -#' gaussian.loglik, -#' loglik.alpha = gaussian.loglik.alpha, -#' c("p0", "exp_dbl") +#' loglik.pi = FBMS::gaussian.loglik, +#' y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' transforms = c("p0", "exp_dbl") #' ) #' #' summary(result) @@ -126,11 +131,22 @@ mjmcmc.parallel <- function(runs = 2, cores = getOption("mc.cores", 2L), ...) { #' plot(result) #' #' @export -gmjmcmc.parallel <- function(runs = 2, cores = getOption("mc.cores", 2L), merge.options = list(populations = "best", complex.measure = 2, tol = 0.0000001), data, loglik.pi = gaussian.loglik, loglik.alpha = gaussian.loglik.alpha, transforms, ...) { +gmjmcmc.parallel <- function( + runs = 2, + cores = getOption("mc.cores", 2L), + merge.options = list(populations = "best", complex.measure = 2, tol = 0.0000001), + x, + y, + loglik.pi = gaussian.loglik, + loglik.alpha = gaussian.loglik.alpha, + mlpost_params = NULL, + transforms, + ... +) { options("gmjmcmc-transformations" = transforms) - results <- rmclapply(seq_len(runs), args = list(data = data, loglik.pi = loglik.pi, loglik.alpha = loglik.alpha, transforms = transforms, ...), mc.cores = cores, fun = gmjmcmc) + results <- rmclapply(seq_len(runs), args = list(x = x, y = y, loglik.pi = loglik.pi, loglik.alpha = loglik.alpha, mlpost_params = mlpost_params, transforms = transforms, ...), mc.cores = cores, fun = gmjmcmc) class(results) <- "gmjmcmc_parallel" - merged <- merge_results(results, merge.options$populations, merge.options$complex.measure, merge.options$tol, data = data) + merged <- merge_results(results, merge.options$populations, merge.options$complex.measure, merge.options$tol, data = list(x = x, y = y)) gc() return(merged) } diff --git a/R/predict.R b/R/predict.R index 3171f73fd1877edc049faf2bd49f02ec2d62d26b..b120c456c930407adee37064dfc20a5e8c592e41 100644 --- a/R/predict.R +++ b/R/predict.R @@ -35,16 +35,11 @@ #' #' @export predict.bgnlm_model <- function(object, x, link = function(x) { x }, ... ) { - x.precalc <- model.matrix( - as.formula(paste0("~I(", paste0(names(object$coefs)[-1][object$coefs[-1]!=0], collapse = ")+I("), ")")), - data = x - ) - #browser() - if(dim(x.precalc)[2]= (1 - tol)) # Compare equivalent features complexity to find most simple - equiv.complex <- list(width=complex$width[equiv.feats], oc=complex$oc[equiv.feats], depth=complex$depth[equiv.feats]) + equiv.complex <- list(width = complex$width[equiv.feats], oc = complex$oc[equiv.feats], depth = complex$depth[equiv.feats]) equiv.simplest <- lapply(equiv.complex, which.min) - feats.map[1:3,equiv.feats] <- c(equiv.feats[equiv.simplest$width], equiv.feats[equiv.simplest$oc], equiv.feats[equiv.simplest$depth]) - feats.map[4,equiv.feats] <- sum(renorms[equiv.feats]) + feats.map[1:3, equiv.feats] <- c(equiv.feats[equiv.simplest$width], equiv.feats[equiv.simplest$oc], equiv.feats[equiv.simplest$depth]) + feats.map[4, equiv.feats] <- sum(renorms[equiv.feats]) } # Select the simplest features based on the specified complexity measure and sort them feats.simplest.ids <- unique(feats.map[complex.measure, ]) @@ -165,7 +166,9 @@ merge_results <- function (results, populations = NULL, complex.measure = NULL, rep.pop = pw$pop.best, best.log.posteriors = bests, rep.thread = pw$thread.best, - transforms = results[[1]]$transforms + transforms = results[[1]]$transforms, + fixed = results[[1]]$fixed, + intercept = results[[1]]$intercept ) attr(merged, "class") <- "gmjmcmc_merged" return(merged) @@ -220,7 +223,9 @@ population.weigths <- function (results, pops.use) { #' @return A character representation of a model #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' P = 2, transforms = c("p0", "exp_dbl")) #' summary(result) #' plot(result) #' model.string(c(TRUE, FALSE, TRUE, FALSE, TRUE), result$populations[[1]]) @@ -228,7 +233,7 @@ population.weigths <- function (results, pops.use) { #' #' @export model.string model.string <- function (model, features, link = "I", round = 2) { - modelstring <- paste0(sapply(features[model], print.feature, alphas = TRUE, round = round), collapse="+") + modelstring <- paste0(sapply(features[model], print.feature, alphas = TRUE, round = round), collapse = "+") modelfun <- set_alphas(modelstring) modelfun$formula <- paste0(link, "(", modelfun$formula, ")") return(modelfun) @@ -291,16 +296,33 @@ get.mpm.model <- function(result, y, x, labels = F, family = "gaussian", loglik. if (family == "binomial") loglik.pi <- logistic.loglik - sm <- summary(result, labels = labels, verbose = FALSE) - mpm <- sm$feats.strings[sm$marg.probs > 0.5] - - x.precalc <- model.matrix( - as.formula(paste0("~I(", paste0(mpm, collapse = ")+I("), ")")), - data = x) - - model <- loglik.pi(y = y, x = x.precalc, model = rep(TRUE, length(mpm) + 1), complex = list(oc = 0), params = params) - class(model) <- "bgnlm_model" - model$crit <- "MPM" + if (is(result, "mjmcmc_parallel")) { + models <- unlist(lapply(result$chains, function (x) x$models), recursive = FALSE) + marg.probs <- marginal.probs.renorm(models)$probs + features <- result$chains[[1]]$populations + } else if (is(result, "gmjmcmc")) { + best_pop <- which.max(unlist(result$best.margs)) + marg.probs <- result$marg.probs[[best_pop]] + features <- result$populations[[best_pop]] + } else if (is(result, "gmjmcmc_merged") || is(result, "mjmcmc")) { + marg.probs <- result$marg.probs + features <- result$features + } + features <- features[marg.probs > 0.5] + + if (result$intercept) { + x <- cbind(1, x) + } + precalc <- precalc.features(list(x = x, y = y, fixed = result$fixed), features) + + coefs <- loglik.pi(y = y, x = precalc$x, model = rep(TRUE, length(features) + result$fixed), complex = list(oc = 0), params = params)$coefs + model <- structure(list( + coefs = coefs, + features = features, + fixed = result$fixed, + intercept = result$intercept + ), class = "bgnlm_model") + return(model) } @@ -334,7 +356,9 @@ get.mpm.model <- function(result, y, x, labels = F, family = "gaussian", loglik. #' } #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(x = matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100), +#' P = 2, transforms = c("p0", "exp_dbl")) #' get.best.model(result) #' #' @export @@ -347,8 +371,8 @@ get.best.model <- function(result, labels = FALSE) { if (length(labels) == 1 && labels[1] == FALSE && length(result[[1]]$labels) > 0) { labels <- result[[1]]$labels } - best.chain <- which.max(sapply(result,function(x)x$best.crit)) - return(get.best.model.mjmcmc(result[[best.chain]], labels)) + best.chain <- which.max(sapply(result$chains, function (x) x$best.crit)) + return(get.best.model.mjmcmc(result$chains[[best.chain]], labels)) } if (is(result,"gmjmcmc")) { @@ -371,7 +395,11 @@ get.best.model.gmjmcmc <- function (result, labels) { best.pop.id <- which.max(sapply(result$best.margs,function(x)x)) best.mod.id <- which.max(sapply(result$models[[best.pop.id]],function(x)x$crit)) ret <- result$models[[best.pop.id]][[best.mod.id]] - names(ret$coefs) <- c("Intercept",sapply(result$populations[[best.pop.id]],print.feature,labels = labels)[which(ret$model)]) + ret$intercept <- result$intercept + ret$fixed <- result$fixed + coefnames <- sapply(result$populations[[best.pop.id]], print.feature, labels = labels)[ret$model] + if (result$intercept) coefnames <- c("Intercept", coefnames) + names(ret$coefs) <- coefnames class(ret) = "bgnlm_model" return(ret) } @@ -382,7 +410,9 @@ get.best.model.mjmcmc <- function (result, labels) { } best.mod.id <- which.max(sapply(result$models,function(x)x$crit)) ret <- result$models[[best.mod.id]] - names(ret$coefs) <- c("Intercept",sapply(result$populations,print.feature,labels = labels)[which(ret$model)]) + coefnames <- sapply(result$populations, print.feature, labels = labels)[ret$model] + if (result$intercept) coefnames <- c("Intercept", coefnames) + names(ret$coefs) <- coefnames class(ret) = "bgnlm_model" return(ret) } @@ -395,7 +425,10 @@ get.best.model.mjmcmc <- function (result, labels) { #' @return A matrix of character representations of the features of a model. #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' P = 2, +#' transforms = c("p0", "exp_dbl")) #' string.population(result$populations[[1]]) #' #' @export @@ -413,7 +446,10 @@ string.population <- function(x, round = 2) { #' @return A matrix of character representations of a list of models. #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' P = 2, +#' transforms = c("p0", "exp_dbl")) #' string.population.models(result$populations[[2]], result$models[[2]]) #' #' @export @@ -434,7 +470,10 @@ string.population.models <- function(features, models, round = 2, link = "I") { #' @return No return value, just creates a plot #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' P = 2, +#' transforms = c("p0", "exp_dbl")) #' plot(result) #' #' @@ -471,7 +510,7 @@ plot.gmjmcmc <- function (x, count = "all", pop = "best", tol = 0.0000001, data #' @return No return value, just creates a plot #' #' @examples -#' result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) #' plot(result) #' #' @export @@ -509,7 +548,11 @@ marg.prob.plot <- function (feats.strings, marg.probs, count = "all", ...) { #' @return No return value, just creates a plot #' #' @examples -#' result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc.parallel(runs = 1, +#' cores = 1, +#' y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' gaussian.loglik) #' plot(result) #' #' @export @@ -520,13 +563,13 @@ plot.mjmcmc_parallel <- function (x, count = "all", ...) { merge_mjmcmc_parallel <- function (x) { run.weights <- run.weigths(x) - marg.probs <- x[[1]]$marg.probs * run.weights[1] - for (i in seq_along(x[-1])) { - marg.probs <- marg.probs + x[[i]]$marg.probs * run.weights[i] + marg.probs <- x$chains[[1]]$marg.probs * run.weights[1] + for (i in seq_along(x[-c(1, (-1:0 + length(x)))])) { + marg.probs <- marg.probs + x$chains[[i]]$marg.probs * run.weights[i] } return(structure( list( - features = sapply(x[[1]]$populations, print), + features = sapply(x$chains[[1]]$populations, print), marg.probs = marg.probs, results = x ), @@ -536,7 +579,7 @@ merge_mjmcmc_parallel <- function (x) { run.weigths <- function (results) { - best.crits <- sapply(results, function (x) x$best.crit) + best.crits <- sapply(results$chains, function (x) x$best.crit) max.crit <- max(best.crits) return(exp(best.crits - max.crit) / sum(exp(best.crits - max.crit))) } @@ -549,12 +592,10 @@ run.weigths <- function (results) { #' result <- gmjmcmc.parallel( #' runs = 1, #' cores = 1, -#' list(populations = "best", complex.measure = 2, tol = 0.0000001), -#' matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), #' P = 2, -#' gaussian.loglik, -#' loglik.alpha = gaussian.loglik.alpha, -#' c("p0", "exp_dbl") +#' transforms = c("p0", "exp_dbl") #' ) #' plot(result) #' @@ -586,8 +627,12 @@ plot.gmjmcmc_merged <- function (x, count = "all", pop = NULL,tol = 0.0000001, #' @examples #' #' data <- data.frame(matrix(rnorm(600), 100)) -#' result <- mjmcmc.parallel(runs = 2, cores = 1, data, gaussian.loglik) -#' compute_effects(result,labels = names(data)[-1]) +#' result <- mjmcmc.parallel(runs = 2, +#' cores = 1, +#' y = matrix(rnorm(100), 100), +#' x = data, +#' gaussian.loglik) +#' compute_effects(result,labels = names(data)) #' #' @seealso \code{\link{predict}} #' @export diff --git a/R/summary.R b/R/summary.R index 10e731f48893ac5e5ef39e3ccb9ae3b5437223b0..2d4c6e50cc7884f0536feeb9d6edb26a3125493a 100644 --- a/R/summary.R +++ b/R/summary.R @@ -14,14 +14,18 @@ #' \item{marg.probs}{Marginal probabilities corresponding to the ordered feature strings.} #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' P = 2, +#' transforms = c("p0", "exp_dbl")) #' summary(result, pop = "best") #' #' @export summary.gmjmcmc <- function (object, pop = "best", tol = 0.0001, labels = FALSE, effects = NULL, data = NULL, verbose = TRUE, ...) { transforms.bak <- set.transforms(object$transforms) - if (length(labels) == 1 && labels[1] == FALSE && length(object$labels) > 0) + if (length(labels) == 1 && labels[1] == FALSE && length(object$labels) > 0) { labels = object$labels + } if (pop == "all") { results <- list() results[[1]] <- object @@ -93,11 +97,10 @@ summary.gmjmcmc <- function (object, pop = "best", tol = 0.0001, labels = FALSE, #' runs = 1, #' cores = 1, #' list(populations = "best", complex.measure = 2, tol = 0.0000001), -#' matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), #' P = 2, -#' gaussian.loglik, -#' loglik.alpha = gaussian.loglik.alpha, -#' c("p0", "exp_dbl") +#' transforms = c("p0", "exp_dbl") #' ) #' summary(result) #' @@ -114,14 +117,14 @@ summary.gmjmcmc_merged <- function (object, tol = 0.0001, labels = FALSE, effect best <- max(sapply(object$results, function (y) y$best)) feats.strings <- sapply(object$features, FUN = function(x) print.feature(x = x, labels = labels, round = 2)) - if (!is.null(effects) & !is.null(labels)) { effects <- compute_effects(object,labels = labels, quantiles = effects) } obj <- summary_internal( best = object$crit.best, - feats.strings, object$marg.probs, + feats.strings = feats.strings, + marg.probs = object$marg.probs, effects = effects, best.pop = object$pop.best, thread.best = object$thread.best, @@ -149,14 +152,20 @@ summary.gmjmcmc_merged <- function (object, tol = 0.0001, labels = FALSE, effect #' \item{marg.probs}{Marginal probabilities corresponding to the ordered feature strings.} #' #' @examples -#' result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc(y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' gaussian.loglik) #' summary(result) #' #' @export summary.mjmcmc <- function (object, tol = 0.0001, labels = FALSE, effects = NULL, verbose = TRUE, ...) { - if (length(labels) == 1 && labels[1] == FALSE && length(object$labels) > 0) - labels = object$labels - return(summary.mjmcmc_parallel(list(object), tol = tol, labels = labels, effects = effects, verbose = verbose)) + return(summary.mjmcmc_parallel( + list(chains = list(object), fixed = object$fixed, intercept = object$intercept), + tol = tol, + labels = labels, + effects = effects, + verbose = verbose + )) } #' Function to print a quick summary of the results @@ -173,25 +182,26 @@ summary.mjmcmc <- function (object, tol = 0.0001, labels = FALSE, effects = NULL #' \item{marg.probs}{Marginal probabilities corresponding to the ordered feature strings.} #' #' @examples -#' result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc.parallel(runs = 1, +#' cores = 1, +#' y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' gaussian.loglik) #' summary(result) #' #' @export summary.mjmcmc_parallel <- function (object, tol = 0.0001, labels = FALSE, effects = NULL, verbose = TRUE, ...) { # Get features as strings for printing - if (length(labels) == 1 && labels[1] == FALSE && length(object[[1]]$labels) > 0) { - labels = object[[1]]$labels + if (length(labels) == 1 && labels[1] == FALSE && length(object$chains[[1]]$labels) > 0) { + labels = object$chains[[1]]$labels } - feats.strings <- sapply(object[[1]]$populations, FUN = function(x) print.feature(x = x, labels = labels, round = 2)) + feats.strings <- sapply(object$chains[[1]]$populations, FUN = function(x) print.feature(x = x, labels = labels, round = 2)) # Get marginal posterior of features - models <- unlist(lapply(object, function (x) x$models), recursive = FALSE) + models <- unlist(lapply(object$chains, function (x) x$models), recursive = FALSE) marg.probs <- marginal.probs.renorm(models)$probs - best <- max(sapply(object, function (x) x$best)) + best <- max(sapply(object$chains, function (x) x$best)) if (!is.null(effects) & !is.null(labels)) { - if (is.list(object)) - effects <- compute_effects(object[[1]],labels = labels, quantiles = effects) - else - effects <- compute_effects(object,labels = labels, quantiles = effects) + effects <- compute_effects(object$chains[[1]], labels = labels, quantiles = effects) } return(summary_internal(best, feats.strings, marg.probs, effects, tol = tol, verbose = verbose)) } diff --git a/README.md b/README.md index 08b826dee9d643833f3755dcc5cc2a5c401ffcc2..a32b6ff74fdc6ea917ae6aff32637d1eef066c4b 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ [![](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) -[![](https://img.shields.io/github/last-commit/jonlachmann/GMJMCMC.svg)](https://github.com/jonlachmann/GMJMCMC/commits/master) +[![](https://img.shields.io/github/last-commit/jonlachmann/GMJMCMC.svg)](https://github.com/jonlachmann/GMJMCMC/commits/data-inputs) [![](https://img.shields.io/github/languages/code-size/jonlachmann/GMJMCMC.svg)](https://github.com/jonlachmann/GMJMCMC) [![R build status](https://github.com/jonlachmann/GMJMCMC/workflows/R-CMD-check/badge.svg)](https://github.com/jonlachmann/GMJMCMC/actions) -[![codecov](https://codecov.io/gh/jonlachmann/GMJMCMC/branch/FBMS/graph/badge.svg)](https://codecov.io/gh/jonlachmann/GMJMCMC) +[![codecov](https://codecov.io/gh/jonlachmann/GMJMCMC/branch/data-inputs/graph/badge.svg)](https://codecov.io/gh/jonlachmann/GMJMCMC) [![License: GPL](https://img.shields.io/badge/license-GPL-blue.svg)](https://cran.r-project.org/web/licenses/GPL) # FBMS - Flexible Bayesian Model Selection diff --git a/man/compute_effects.Rd b/man/compute_effects.Rd index a0105914b43a1c83a6bba586d3f31fd7540108e8..e6c2e9671541cb36e260a7f7ba7d39934f7daadc 100644 --- a/man/compute_effects.Rd +++ b/man/compute_effects.Rd @@ -24,8 +24,12 @@ Users can provide custom labels and specify quantiles for the computation of eff \examples{ data <- data.frame(matrix(rnorm(600), 100)) -result <- mjmcmc.parallel(runs = 2, cores = 1, data, gaussian.loglik) -compute_effects(result,labels = names(data)[-1]) +result <- mjmcmc.parallel(runs = 2, +cores = 1, +y = matrix(rnorm(100), 100), +x = data, +gaussian.loglik) +compute_effects(result,labels = names(data)) } \seealso{ diff --git a/man/diagn_plot.Rd b/man/diagn_plot.Rd index 058797b7c2b6d4c91606b7ed44aac5573438870c..82fb235258fcd848dc33513d744c8a82d694612a 100644 --- a/man/diagn_plot.Rd +++ b/man/diagn_plot.Rd @@ -26,7 +26,10 @@ A list of summary statistics for checking convergence with given confidence inte Plot convergence of best/median/mean/other summary log posteriors in time } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc( y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +P = 2, +transforms = c("p0", "exp_dbl")) diagnstats <- diagn_plot(result) } diff --git a/man/fbms.Rd b/man/fbms.Rd index 55e251c17b3c2dab793fada583210bda413301f2..210b1dfe881adaec11c3dc746792165f9ffec773 100644 --- a/man/fbms.Rd +++ b/man/fbms.Rd @@ -8,9 +8,11 @@ Or Fit a BGLM model using Modified Mode Jumping Markov Chain Monte Carlo (MCMC) fbms( formula = NULL, family = "gaussian", + beta_prior = list(type = "g-prior"), + model_prior = NULL, data = NULL, impute = FALSE, - loglik.pi = gaussian.loglik, + loglik.pi = NULL, method = "mjmcmc", verbose = TRUE, ... @@ -19,7 +21,41 @@ fbms( \arguments{ \item{formula}{A formula object specifying the model structure. Default is NULL.} -\item{family}{The distribution family of the response variable. Currently supports "gaussian", "binomial" and "custom". Default is "gaussian".} +\item{family}{The distribution family of the response variable. Currently supports "gaussian", "binomial", "poisson", "gamma", and "custom". Default is "gaussian".} + +\item{beta_prior}{Type of prior as a string (default: "g-prior" with a = max(n, p^2)). Possible values include: +- "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +- "CH": Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, optionally \code{s}) +- "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires \code{a} for Gaussian) +- "EB-global": Empirical Bayes local prior (Gaussian, requires \code{a} for Gaussian) +- "g-prior": Zellner's g-prior (GLM/Gaussian, requires \code{g}) +- "hyper-g": Hyper-g prior (GLM/Gaussian, requires \code{a}) +- "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires \code{a}) +- "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, \code{s}, \code{rho}, \code{v}, \code{k}) +- "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +- "TG": Truncated Gamma prior (GLM/Gamma, requires \code{a}, \code{s}) +- "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) +- "uniform": Uniform prior (GLM/Gaussian, no additional args) +- "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +- "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +- "robust": Robust prior (Gaussian/GLM, no additional args) +- "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +- "ZS-null": Zellner-Siow null prior (Gaussian, requires \code{a}) +- "ZS-full": Zellner-Siow full prior (Gaussian, requires \code{a}) +- "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires \code{a}) +- "AIC": AIC prior from BAS (Gaussian, requires penalty \code{a}) +- "BIC": BIC prior from BAS (Gaussian/GLM) +- "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires \code{a}) +\itemize{ +\item r: Model complexity penalty (default: 1/n) +\item g: Tuning parameter for g-prior (default: max(n, p^2)) +\item a, b, s, v, rho, k: Hyperparameters for various priors +\item n: Sample size for some priors (default: length(y)) +\item var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") +\item laplace: Logical for Laplace approximation in GLM only (default: FALSE) +}} + +\item{model_prior}{a list with parameters of model priors, by default r should be provided} \item{data}{A data frame containing the variables in the model. If NULL, the variables are taken from the environment of the formula. Default is NULL.} @@ -53,7 +89,6 @@ fbms_result <- fbms( cores = 1 ) summary(fbms_result) -plot(fbms_result) } diff --git a/man/fbms.mlik.master.Rd b/man/fbms.mlik.master.Rd index b36851f1a706658a4cf51ab16c0bb6dc988dfb57..a6556ea141ef3ad680272baa99aa9ea6f22f79de 100644 --- a/man/fbms.mlik.master.Rd +++ b/man/fbms.mlik.master.Rd @@ -9,7 +9,7 @@ fbms.mlik.master( x, model, complex, - params = list(family = "gaussian", prior_beta = "g-prior", r = exp(-0.5)) + params = list(family = "gaussian", beta_prior = list(type = "g-prior"), r = exp(-0.5)) ) } \arguments{ @@ -24,47 +24,38 @@ fbms.mlik.master( \item{params}{A list of parameters controlling the model family, prior, and tuning parameters. Key elements include: \itemize{ -\item family: "binomial", "poisson", "gamma", or "gaussian" (default: "gaussian") +\item family: "binomial", "poisson", "gamma" (all three referred to as GLM below), or "gaussian" (default: "gaussian") \item prior_beta: Type of prior as a string (default: "g-prior"). Possible values include: \itemize{ -\item "beta.prime": Beta-prime prior (GLM, requires \code{n}) -\item "bic.prior": BIC-based prior (GLM, requires \code{n}) -\item "CCH": Chen-Clyde-Hsu prior (GLM, requires \code{a}, \code{b}, optionally \code{s}) -\item "EB.local": Empirical Bayes local prior (GLM/Gaussian BAS, requires \code{alpha} for Gaussian) +\item "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +\item "CH": Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, optionally \code{s}) +\item "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires \code{a} for Gaussian) +\item "EB-global": Empirical Bayes local prior (Gaussian, requires \code{a} for Gaussian) \item "g-prior": Zellner's g-prior (GLM/Gaussian, requires \code{g}) -\item "hyper.g": Hyper-g prior (GLM, requires \code{a}) -\item "hyper.g.n": Hyper-g/n prior (GLM, requires \code{a}, \code{n}) -\item "tCCH": Truncated Chen-Clyde-Hsu prior (GLM, requires \code{a}, \code{b}, optionally \code{s}, \code{rho}, \code{v}, \code{k}) -\item "intrinsic": Intrinsic prior (GLM, requires \code{n}) -\item "testBF.prior": Test Bayes factor prior (GLM, requires \code{g}) -\item "TG": Truncated Gamma prior (GLM, requires \code{a}) +\item "hyper-g": Hyper-g prior (GLM/Gaussian, requires \code{a}) +\item "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires \code{a}) +\item "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, \code{s}, \code{rho}, \code{v}, \code{k}) +\item "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +\item "TG": Truncated Gamma prior (GLM/Gamma, requires \code{a}, \code{s}) \item "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) -\item "uniform": Uniform prior (GLM, no additional args) -\item "CH": Custom Chen-Hsu prior (Gaussian TCCH, requires \code{a}, \code{b}, optionally \code{s}) -\item "Hyper-g": Hyper-g prior (Gaussian TCCH, no additional args) -\item "Uniform": Uniform prior (Gaussian TCCH, no additional args) -\item "Beta-prime": Beta-prime prior (Gaussian TCCH, no additional args) -\item "Benchmark": Benchmark prior (Gaussian TCCH, no additional args) -\item "TruncGamma": Truncated Gamma prior (Gaussian TCCH, requires \code{at}, \code{st}) -\item "ZS adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) -\item "Robust": Robust prior (Gaussian TCCH, no additional args) -\item "Hyper-g/n": Hyper-g/n prior (Gaussian TCCH, no additional args) -\item "Intrinsic": Intrinsic prior (Gaussian TCCH, no additional args) -\item "hyper-g": Hyper-g prior (Gaussian BAS, requires \code{alpha}) -\item "BIC": BIC prior (Gaussian BAS, requires \code{alpha}) -\item "ZS-null": Zellner-Siow null prior (Gaussian BAS, requires \code{alpha}) -\item "ZS-full": Zellner-Siow full prior (Gaussian BAS, requires \code{alpha}) -\item "hyper-g-laplace": Hyper-g Laplace prior (Gaussian BAS, requires \code{alpha}) -\item "AIC": AIC prior (Gaussian BAS, requires \code{alpha}) -\item "JZS": Jeffreys-Zellner-Siow prior (Gaussian BAS, requires \code{alpha}) +\item "uniform": Uniform prior (GLM/Gaussian, no additional args) +\item "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +\item "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +\item "robust": Robust prior (Gaussian/GLM, no additional args) +\item "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +\item "ZS-null": Zellner-Siow null prior (Gaussian, requires \code{a}) +\item "ZS-full": Zellner-Siow full prior (Gaussian, requires \code{a}) +\item "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires \code{a}) +\item "AIC": AIC prior from BAS (Gaussian, requires penalty \code{a}) +\item "BIC": BIC prior from BAS (Gaussian/GLM) +\item "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires \code{a}) } -\item r: Model complexity penalty (default: 1/length(y)) -\item g: Tuning parameter for g-prior (default: max(n, p^2)) +\item r: Model complexity penalty (default: 1/n) +\item a: Tuning parameter for g-prior (default: max(n, p^2)) \item a, b, s, v, rho, k: Hyperparameters for various priors -\item at, st: Additional parameters for TruncGamma prior \item n: Sample size for some priors (default: length(y)) \item var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") -\item laplace: Logical for Laplace approximation in GLM (default: FALSE) +\item laplace: Logical for Laplace approximation in GLM only (default: FALSE) }} } \value{ @@ -77,6 +68,11 @@ This function serves as a unified interface to compute the log marginal likeliho for different regression models and priors by calling specific log likelihood functions. } \examples{ -fbms.mlik.master(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), list(family = "gaussian", prior_beta = "g-prior")) +fbms.mlik.master(y = rnorm(100), +x = matrix(rnorm(100)), +c(TRUE,TRUE), +list(oc = 1), +params = list(family = "gaussian", beta_prior = list(type = "g-prior", a = 2), + r = exp(-0.5))) } diff --git a/man/fbms.mlik.master_old.Rd b/man/fbms.mlik.master_old.Rd new file mode 100644 index 0000000000000000000000000000000000000000..f317e7f9cdfb190356b4603ecd676268562438e9 --- /dev/null +++ b/man/fbms.mlik.master_old.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{fbms.mlik.master_old} +\alias{fbms.mlik.master_old} +\title{Master Log Marginal Likelihood Function} +\usage{ +fbms.mlik.master_old( + y, + x, + model, + complex, + params = list(family = "gaussian", prior_beta = "g-prior", r = exp(-0.5)) +) +} +\arguments{ +\item{y}{A numeric vector containing the dependent variable.} + +\item{x}{A matrix containing the precalculated features (independent variables).} + +\item{model}{A logical vector indicating which variables to include in the model.} + +\item{complex}{A list of complexity measures for the features.} + +\item{params}{A list of parameters controlling the model family, prior, and tuning parameters. +Key elements include: +\itemize{ +\item family: "binomial", "poisson", "gamma" (all three referred to as GLM below), or "gaussian" (default: "gaussian") +\item prior_beta: Type of prior as a string (default: "g-prior"). Possible values include: +\itemize{ +\item "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +\item "CH": Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, optionally \code{s}) +\item "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires \code{a} for Gaussian) +\item "EB-global": Empirical Bayes local prior (Gaussian, requires \code{a} for Gaussian) +\item "g-prior": Zellner's g-prior (GLM/Gaussian, requires \code{g}) +\item "hyper-g": Hyper-g prior (GLM/Gaussian, requires \code{a}) +\item "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires \code{a}) +\item "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, \code{s}, \code{rho}, \code{v}, \code{k}) +\item "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +\item "TG": Truncated Gamma prior (GLM/Gamma, requires \code{a}, \code{s}) +\item "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) +\item "uniform": Uniform prior (GLM/Gaussian, no additional args) +\item "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +\item "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +\item "robust": Robust prior (Gaussian/GLM, no additional args) +\item "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +\item "ZS-null": Zellner-Siow null prior (Gaussian, requires \code{a}) +\item "ZS-full": Zellner-Siow full prior (Gaussian, requires \code{a}) +\item "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires \code{a}) +\item "AIC": AIC prior from BAS (Gaussian, requires penalty \code{a}) +\item "BIC": BIC prior from BAS (Gaussian/GLM) +\item "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires \code{a}) +} +\item r: Model complexity penalty (default: 1/n) +\item g: Tuning parameter for g-prior (default: max(n, p^2)) +\item a, b, s, v, rho, k: Hyperparameters for various priors +\item n: Sample size for some priors (default: length(y)) +\item var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") +\item laplace: Logical for Laplace approximation in GLM only (default: FALSE) +}} +} +\value{ +A list with elements: +\item{crit}{Log marginal likelihood combined with the log prior.} +\item{coefs}{Posterior mode of the coefficients.} +} +\description{ +This function serves as a unified interface to compute the log marginal likelihood +for different regression models and priors by calling specific log likelihood functions. +} +\examples{ +fbms.mlik.master(rnorm(100), matrix(rnorm(100)), c(TRUE,TRUE), list(oc = 1)) + +} diff --git a/man/gaussian.loglik.Rd b/man/gaussian.loglik.Rd index b0bbae6fcaee9fd983aed15673c470fcbf0656e3..5b168e0798813c2237269b5511a8c2ea69e1a2d3 100644 --- a/man/gaussian.loglik.Rd +++ b/man/gaussian.loglik.Rd @@ -25,6 +25,6 @@ Log likelihood function for gaussian regression with a Jeffreys prior and BIC ap } \examples{ gaussian.loglik(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), NULL) - + } diff --git a/man/gaussian.loglik2.Rd b/man/gaussian.loglik2.Rd new file mode 100644 index 0000000000000000000000000000000000000000..4f19fc777e9f9e339f908fed59cf34823ed26e89 --- /dev/null +++ b/man/gaussian.loglik2.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{gaussian.loglik2} +\alias{gaussian.loglik2} +\title{Log likelihood function for gaussian regression with a Jeffreys prior and BIC approximation of MLIK with both known and unknown variance of the responses} +\usage{ +gaussian.loglik2(y, x, model, complex, params) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for gaussian regression with a Jeffreys prior and BIC approximation of MLIK with both known and unknown variance of the responses +} +\examples{ +gaussian.loglik(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), NULL) + + +} diff --git a/man/gaussian.loglik2.alpha.Rd b/man/gaussian.loglik2.alpha.Rd new file mode 100644 index 0000000000000000000000000000000000000000..cb9874ec79a5e6e2140056b22eebed82e1144dfd --- /dev/null +++ b/man/gaussian.loglik2.alpha.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{gaussian.loglik2.alpha} +\alias{gaussian.loglik2.alpha} +\title{Log likelihood function for gaussian regression for alpha calculation +This function is just the bare likelihood function +Note that it only gives a proportional value and is equivalent to least squares} +\usage{ +gaussian.loglik2.alpha(a, data, mu_func) +} +\arguments{ +\item{a}{A vector of the alphas to be used} + +\item{data}{The data to be used for calculation} + +\item{mu_func}{The function linking the mean to the covariates, +as a string with the alphas as a[i].} +} +\value{ +A numeric with the log likelihood. +} +\description{ +Log likelihood function for gaussian regression for alpha calculation +This function is just the bare likelihood function +Note that it only gives a proportional value and is equivalent to least squares +} +\examples{ +\dontrun{ +gaussian.loglik2.alpha(my_alpha,my_data,my_mu) +} +} diff --git a/man/gaussian.loglik2.g.Rd b/man/gaussian.loglik2.g.Rd new file mode 100644 index 0000000000000000000000000000000000000000..85e32eadc3bffcb388bf226f41b9f70f5fb4d006 --- /dev/null +++ b/man/gaussian.loglik2.g.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{gaussian.loglik2.g} +\alias{gaussian.loglik2.g} +\title{Log likelihood function for linear regression using Zellners g-prior} +\usage{ +gaussian.loglik2.g(y, x, model, complex, params = NULL) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for linear regression using Zellners g-prior +} +\examples{ +gaussian.loglik2.g(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) + +} diff --git a/man/gaussian_tcch_log_likelihood.Rd b/man/gaussian_tcch_log_likelihood.Rd index bba88fb0e235dc5249e969d99f4d260d920eb45d..54c951628149ef0de5359cedefa1f924802247dc 100644 --- a/man/gaussian_tcch_log_likelihood.Rd +++ b/man/gaussian_tcch_log_likelihood.Rd @@ -9,7 +9,7 @@ gaussian_tcch_log_likelihood( x, model, complex, - params = list(r = exp(-0.5), prior_beta = "Intrinsic") + params = list(r = exp(-0.5), beta_prior = list(type = "intrinsic")) ) } \arguments{ @@ -32,6 +32,6 @@ A list with elements: This function computes the marginal likelihood of a Gaussian regression model under different priors. } \examples{ -gaussian_tcch_log_likelihood(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) +gaussian_tcch_log_likelihood(rnorm(100), matrix(rnorm(100)), c(TRUE), list(oc=1)) } diff --git a/man/gaussian_tcch_log_likelihood2.Rd b/man/gaussian_tcch_log_likelihood2.Rd new file mode 100644 index 0000000000000000000000000000000000000000..3c5ad15abc506e7c8c7e21b7d7e9c143890d136d --- /dev/null +++ b/man/gaussian_tcch_log_likelihood2.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{gaussian_tcch_log_likelihood2} +\alias{gaussian_tcch_log_likelihood2} +\title{Log likelihood function for Gaussian regression with parameter priors from BAS package} +\usage{ +gaussian_tcch_log_likelihood2( + y, + x, + model, + complex, + params = list(r = exp(-0.5), prior_beta = "intrinsic") +) +} +\arguments{ +\item{y}{A numeric vector containing the dependent variable.} + +\item{x}{A matrix containing the independent variables, including an intercept column.} + +\item{model}{A logical vector indicating which variables to include in the model.} + +\item{complex}{A list containing complexity measures for the features.} + +\item{params}{A list of parameters for the log likelihood, specifying the tuning parameters of beta priors.} +} +\value{ +A list with elements: +\item{crit}{Log marginal likelihood combined with the log prior.} +\item{coefs}{Posterior mode of the coefficients.} +} +\description{ +This function computes the marginal likelihood of a Gaussian regression model under different priors. +} +\examples{ +gaussian_tcch_log_likelihood2(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) + +} diff --git a/man/gen.params.gmjmcmc.Rd b/man/gen.params.gmjmcmc.Rd index 945cd644c4306c95c48e85a85b10da099cfdaf12..611c7470670b4528b923dc16569f13d8a3422750 100644 --- a/man/gen.params.gmjmcmc.Rd +++ b/man/gen.params.gmjmcmc.Rd @@ -4,10 +4,10 @@ \alias{gen.params.gmjmcmc} \title{Generate a parameter list for GMJMCMC (Genetically Modified MJMCMC)} \usage{ -gen.params.gmjmcmc(data) +gen.params.gmjmcmc(ncov) } \arguments{ -\item{data}{A data frame containing the dataset with covariates and response variable.} +\item{ncov}{The number of covariates in the dataset that will be used in the algorithm} } \value{ A list of parameters for controlling GMJMCMC behavior: @@ -101,7 +101,7 @@ This function generates the full list of parameters required for the Generalized \examples{ data <- data.frame(y = rnorm(100), x1 = rnorm(100), x2 = rnorm(100)) -params <- gen.params.gmjmcmc(data) +params <- gen.params.gmjmcmc(ncol(data) - 1) str(params) } diff --git a/man/gen.params.mjmcmc.Rd b/man/gen.params.mjmcmc.Rd index 0fe0b17f344a047c1161a6a6a4de4d8ce6f658fb..67f217b22bba71fa326b118e622a7a89e237916b 100644 --- a/man/gen.params.mjmcmc.Rd +++ b/man/gen.params.mjmcmc.Rd @@ -4,10 +4,10 @@ \alias{gen.params.mjmcmc} \title{Generate a parameter list for MJMCMC (Mode Jumping MCMC)} \usage{ -gen.params.mjmcmc(data) +gen.params.mjmcmc(ncov) } \arguments{ -\item{data}{The dataset that will be used in the algorithm} +\item{ncov}{The number of covariates in the dataset that will be used in the algorithm} } \value{ A list of parameters to use when running the mjmcmc function. diff --git a/man/get.best.model.Rd b/man/get.best.model.Rd index 42d8f9f36a38c8d351979b53d9a729ff7c6103da..9b564e519b9825dcccb0e6e7d905c8902e413d43 100644 --- a/man/get.best.model.Rd +++ b/man/get.best.model.Rd @@ -37,7 +37,9 @@ The function identifies the best model by selecting the one with the highest \co } } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(x = matrix(rnorm(600), 100), +y = matrix(rnorm(100), 100), +P = 2, transforms = c("p0", "exp_dbl")) get.best.model(result) } diff --git a/man/glm.loglik.Rd b/man/glm.loglik.Rd new file mode 100644 index 0000000000000000000000000000000000000000..3cc398db9ead7bb1cb0deefa295adec300438e63 --- /dev/null +++ b/man/glm.loglik.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods.R +\name{glm.loglik} +\alias{glm.loglik} +\title{Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model.} +\usage{ +glm.loglik( + y, + x, + model, + complex, + params = list(r = exp(-0.5), family = "Gamma") +) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user, family must be specified} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model. +} +\examples{ +glm.loglik(abs(rnorm(100))+1, matrix(rnorm(100)), TRUE, list(oc = 1)) + + +} diff --git a/man/glm.loglik2.Rd b/man/glm.loglik2.Rd new file mode 100644 index 0000000000000000000000000000000000000000..8a698702a256dad1909147909ec62c1841188fd9 --- /dev/null +++ b/man/glm.loglik2.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{glm.loglik2} +\alias{glm.loglik2} +\title{Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model.} +\usage{ +glm.loglik2( + y, + x, + model, + complex, + params = list(r = exp(-0.5), family = "Gamma") +) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user, family must be specified} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model. +} +\examples{ +glm.loglik(abs(rnorm(100))+1, matrix(rnorm(100)), TRUE, list(oc = 1)) + + +} diff --git a/man/glm.logpost.bas.Rd b/man/glm.logpost.bas.Rd index 50eb6254123d6691baca1a4f6b09d4fdf784ede9..87fdcc836aee7a0a1ecd41d932a564adc8d0b704 100644 --- a/man/glm.logpost.bas.Rd +++ b/man/glm.logpost.bas.Rd @@ -11,8 +11,7 @@ glm.logpost.bas( x, model, complex, - params = list(r = exp(-0.5), family = "binomial", prior_beta = Jeffreys(), laplace = - FALSE) + params = list(r = NULL, family = "binomial", beta_prior = Jeffreys(), laplace = FALSE) ) } \arguments{ @@ -35,6 +34,9 @@ This function is created as an example of how to create an estimator that is use to calculate the marginal likelihood of a model. } \examples{ -glm.logpost.bas(as.integer(rnorm(100) > 0),cbind(1,matrix(rnorm(100))),c(TRUE,TRUE),list(oc = 1)) +glm.logpost.bas(as.integer(rnorm(100) > 0), +cbind(1, matrix(rnorm(100))), +c(TRUE, TRUE), +list(oc = 1)) } diff --git a/man/glm.logpost.bas2.Rd b/man/glm.logpost.bas2.Rd new file mode 100644 index 0000000000000000000000000000000000000000..c079449f5f157973792f2f9128bf73e595a374c6 --- /dev/null +++ b/man/glm.logpost.bas2.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{glm.logpost.bas2} +\alias{glm.logpost.bas2} +\title{Log likelihood function for glm regression with parameter priors from BAS package +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model.} +\usage{ +glm.logpost.bas2( + y, + x, + model, + complex, + params = list(r = exp(-0.5), family = "binomial", prior_beta = Jeffreys(), laplace = + FALSE) +) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors and family that BAS uses in glm models} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for glm regression with parameter priors from BAS package +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model. +} +\examples{ +glm.logpost.bas(as.integer(rnorm(100) > 0),cbind(1,matrix(rnorm(100))),c(TRUE,TRUE),list(oc = 1)) + +} diff --git a/man/gmjmcmc.Rd b/man/gmjmcmc.Rd index 471e01637e2275b6307ea6fe878a89860268a5cc..45d9c3054f0c09b2766b7563d76a275c990741a5 100644 --- a/man/gmjmcmc.Rd +++ b/man/gmjmcmc.Rd @@ -5,10 +5,14 @@ \title{Main algorithm for GMJMCMC (Genetically Modified MJMCMC)} \usage{ gmjmcmc( - data, - loglik.pi = gaussian.loglik, + x, + y, + loglik.pi = fbms.mlik.master, loglik.alpha = gaussian.loglik.alpha, + mlpost_params = list(family = "gaussian", beta_prior = list(type = "robust")), transforms, + intercept = TRUE, + fixed = 0, P = 10, N.init = 100, N.final = 100, @@ -19,17 +23,23 @@ gmjmcmc( ) } \arguments{ -\item{data}{A matrix containing the data to use in the algorithm, -first column should be the dependent variable, -and the rest of the columns should be the independent variables.} +\item{x}{matrix containing the design matrix with data to use in the algorithm,} + +\item{y}{response variable} \item{loglik.pi}{The (log) density to explore} \item{loglik.alpha}{The likelihood function to use for alpha calculation} +\item{mlpost_params}{parameters for the estimator function loglik.pi} + \item{transforms}{A Character vector including the names of the non-linear functions to be used by the modification and the projection operator.} +\item{intercept}{whether intercept should be added to the design matrix (no model selection for intercept)} + +\item{fixed}{how many of the first columns of the design matrix will always be included in the models} + \item{P}{The number of generations for GMJMCMC (Genetically Modified MJMCMC). The default value is $P = 10$. A larger value like $P = 50$ might be more realistic for more complicated examples where one expects a lot of non-linear structures.} @@ -63,7 +73,10 @@ A list containing the following elements: Main algorithm for GMJMCMC (Genetically Modified MJMCMC) } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +P = 2, +transform = c("p0", "exp_dbl")) summary(result) plot(result) diff --git a/man/gmjmcmc.parallel.Rd b/man/gmjmcmc.parallel.Rd index d51130d64e1e1a8c4d818b48e9589c0fbc6019e9..6efa65c049df56eaafd1bc232bdcf5764a12e9ea 100644 --- a/man/gmjmcmc.parallel.Rd +++ b/man/gmjmcmc.parallel.Rd @@ -8,9 +8,11 @@ gmjmcmc.parallel( runs = 2, cores = getOption("mc.cores", 2L), merge.options = list(populations = "best", complex.measure = 2, tol = 1e-07), - data, + x, + y, loglik.pi = gaussian.loglik, loglik.alpha = gaussian.loglik.alpha, + mlpost_params = NULL, transforms, ... ) @@ -22,14 +24,16 @@ gmjmcmc.parallel( \item{merge.options}{A list of options to pass to the \code{\link[=merge_results]{merge_results()}} function run after the} -\item{data}{A matrix containing the data to use in the algorithm, -first column should be the dependent variable, -and the rest of the columns should be the independent variables.} +\item{x}{matrix containing the design matrix with data to use in the algorithm,} + +\item{y}{response variable} \item{loglik.pi}{The (log) density to explore} \item{loglik.alpha}{The likelihood function to use for alpha calculation} +\item{mlpost_params}{parameters for the estimator function loglik.pi} + \item{transforms}{A Character vector including the names of the non-linear functions to be used by the modification and the projection operator.} @@ -45,12 +49,10 @@ Run multiple gmjmcmc (Genetically Modified MJMCMC) runs in parallel returning a result <- gmjmcmc.parallel( runs = 1, cores = 1, - list(populations = "best", complex.measure = 2, tol = 0.0000001), - matrix(rnorm(600), 100), - P = 2, - gaussian.loglik, - loglik.alpha = gaussian.loglik.alpha, - c("p0", "exp_dbl") + loglik.pi = FBMS::gaussian.loglik, + y = matrix(rnorm(100), 100), + x = matrix(rnorm(600), 100), + transforms = c("p0", "exp_dbl") ) summary(result) diff --git a/man/lm.logpost.bas.Rd b/man/lm.logpost.bas.Rd index 29066e860f86417c33c8b6fa186872823b029fe6..61d176d5e9d96e34ad91db4bf117b221cc5aa3a6 100644 --- a/man/lm.logpost.bas.Rd +++ b/man/lm.logpost.bas.Rd @@ -11,7 +11,7 @@ lm.logpost.bas( x, model, complex, - params = list(r = exp(-0.5), prior_beta = "g-prior", alpha = 4) + params = list(r = exp(-0.5), beta_prior = list(method = 1)) ) } \arguments{ @@ -23,7 +23,7 @@ lm.logpost.bas( \item{complex}{A list of complexity measures for the features} -\item{params}{A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors and in Gaussian models} +\item{params}{A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors where the corresponding integers as prior_beta must be provided "g-prior" = 0, "hyper-g" = 1, "EB-local" = 2, "BIC" = 3, "ZS-null" = 4, "ZS-full" = 5, "hyper-g-laplace" = 6, "AIC" = 7, "EB-global" = 2, "hyper-g-n" = 8, "JZS" = 9 and in Gaussian models} } \value{ A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). diff --git a/man/lm.logpost.bas2.Rd b/man/lm.logpost.bas2.Rd new file mode 100644 index 0000000000000000000000000000000000000000..e04283e5f1a025f4659cd35aec502ee53165569c --- /dev/null +++ b/man/lm.logpost.bas2.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{lm.logpost.bas2} +\alias{lm.logpost.bas2} +\title{Log likelihood function for Gaussian regression with parameter priors from BAS package +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model.} +\usage{ +lm.logpost.bas2( + y, + x, + model, + complex, + params = list(r = exp(-0.5), prior_beta = "g-prior", alpha = 4) +) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors where the corresponding integers as prior_beta must be provided "g-prior" = 0, "hyper-g" = 1, "EB-local" = 2, "BIC" = 3, "ZS-null" = 4, "ZS-full" = 5, "hyper-g-laplace" = 6, "AIC" = 7, "EB-global" = 2, "hyper-g-n" = 8, "JZS" = 9 and in Gaussian models} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for Gaussian regression with parameter priors from BAS package +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model. +} +\examples{ +lm.logpost.bas(rnorm(100), cbind(1,matrix(rnorm(100))), c(TRUE,TRUE), list(oc = 1)) + + +} diff --git a/man/log_prior.Rd b/man/log_prior.Rd index 80a87a1d4b15c93927d684a867de88581ba9f1fb..1fb5950977f820f5bbb658a41749a5f3a8629ace 100644 --- a/man/log_prior.Rd +++ b/man/log_prior.Rd @@ -1,9 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/likelihoods.R +% Please edit documentation in R/likelihoods.R, R/likelihoods2.R \name{log_prior} \alias{log_prior} \title{Log model prior function} \usage{ +log_prior(params, complex) + log_prior(params, complex) } \arguments{ @@ -12,12 +14,18 @@ log_prior(params, complex) \item{complex}{list of complexity measures of the features included into the model} } \value{ +A numeric with the log model prior. + A numeric with the log model prior. } \description{ +Log model prior function + Log model prior function } \examples{ log_prior(params = list(r=2), complex = list(oc = 2)) +log_prior(params = list(r=2), complex = list(oc = 2)) + } diff --git a/man/logistic.loglik2.Rd b/man/logistic.loglik2.Rd new file mode 100644 index 0000000000000000000000000000000000000000..c9cc3c8178d9fcfee6885b35056edb3685071575 --- /dev/null +++ b/man/logistic.loglik2.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{logistic.loglik2} +\alias{logistic.loglik2} +\title{Log likelihood function for logistic regression with a Jeffreys parameter prior and BIC approximations of the posterior +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model.} +\usage{ +logistic.loglik2(y, x, model, complex, params = list(r = exp(-0.5))) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for logistic regression with a Jeffreys parameter prior and BIC approximations of the posterior +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model. +} +\examples{ +logistic.loglik2(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) + + +} diff --git a/man/logistic.loglik2.ala.Rd b/man/logistic.loglik2.ala.Rd new file mode 100644 index 0000000000000000000000000000000000000000..190bd2fa13ac99abe7e71dea31570b4f78451ad2 --- /dev/null +++ b/man/logistic.loglik2.ala.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{logistic.loglik2.ala} +\alias{logistic.loglik2.ala} +\title{Log likelihood function for logistic regression with an approximate Laplace approximations used +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model.} +\usage{ +logistic.loglik2.ala(y, x, model, complex, params = list(r = exp(-0.5))) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for logistic regression with an approximate Laplace approximations used +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model. +} +\examples{ +logistic.loglik2.ala(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) + + +} diff --git a/man/logistic.loglik2.alpha.Rd b/man/logistic.loglik2.alpha.Rd new file mode 100644 index 0000000000000000000000000000000000000000..56caadc1e5604957b1d90b6e0e961668c0059c91 --- /dev/null +++ b/man/logistic.loglik2.alpha.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{logistic.loglik2.alpha} +\alias{logistic.loglik2.alpha} +\title{Log likelihood function for logistic regression for alpha calculation +This function is just the bare likelihood function} +\usage{ +logistic.loglik2.alpha(a, data, mu_func) +} +\arguments{ +\item{a}{A vector of the alphas to be used} + +\item{data}{The data to be used for calculation} + +\item{mu_func}{The function linking the mean to the covariates, +as a string with the alphas as a[i].} +} +\value{ +A numeric with the log likelihood. +} +\description{ +Log likelihood function for logistic regression for alpha calculation +This function is just the bare likelihood function +} diff --git a/man/marginal.probs.Rd b/man/marginal.probs.Rd index 53df969d500b4049e6bc8139bca5641e9b423313..cf927ecd2dc5366d1493bf33f92cbcdd17d1b296 100644 --- a/man/marginal.probs.Rd +++ b/man/marginal.probs.Rd @@ -16,7 +16,10 @@ A numeric vector of marginal model probabilities based on relative frequencies o Function for calculating marginal inclusion probabilities of features given a list of models } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(x = matrix(rnorm(600), 100), +y = matrix(rnorm(100), 100), +P = 2, +transforms = c("p0", "exp_dbl")) marginal.probs(result$models[[1]]) } diff --git a/man/merge_results.Rd b/man/merge_results.Rd index c0a89ab7cdbc090677857a6c1ea028f6ae48d2a2..32c10f606cfbb0c9880ad10d1371c36ea9b8ad82 100644 --- a/man/merge_results.Rd +++ b/man/merge_results.Rd @@ -50,12 +50,9 @@ and merge the results together, simplifying by merging equivalent features (havi result <- gmjmcmc.parallel( runs = 1, cores = 1, - list(populations = "best", complex.measure = 2, tol = 0.0000001), - matrix(rnorm(600), 100), + y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, - gaussian.loglik, - loglik.alpha = gaussian.loglik.alpha, - c("p0", "exp_dbl") + transforms = c("p0", "exp_dbl") ) summary(result) diff --git a/man/mjmcmc.Rd b/man/mjmcmc.Rd index 9d2fe5892e85a89751fcabf21ffe98bd26ba93a1..064f3f587cafb3a5df8db428510e15b28c69a738 100644 --- a/man/mjmcmc.Rd +++ b/man/mjmcmc.Rd @@ -5,8 +5,12 @@ \title{Main algorithm for MJMCMC (Genetically Modified MJMCMC)} \usage{ mjmcmc( - data, - loglik.pi = gaussian.loglik, + x, + y, + loglik.pi = fbms.mlik.master, + mlpost_params = list(family = "gaussian", beta_prior = list(type = "g-prior")), + intercept = TRUE, + fixed = 0, N = 100, probs = NULL, params = NULL, @@ -15,12 +19,18 @@ mjmcmc( ) } \arguments{ -\item{data}{A matrix containing the data to use in the algorithm, -first column should be the dependent variable, -and the rest of the columns should be the independent variables.} +\item{x}{matrix containing the design matrix with data to use in the algorithm,} + +\item{y}{response variable} \item{loglik.pi}{The (log) density to explore} +\item{mlpost_params}{parameters for the estimator function loglik.pi} + +\item{intercept}{whether intercept should be added to the design matrix (no model selection for intercept)} + +\item{fixed}{how many of the first columns of the design matrix will always be included in the models} + \item{N}{The number of iterations to run for} \item{probs}{A list of the various probability vectors to use} @@ -46,7 +56,7 @@ A list containing the following elements: Main algorithm for MJMCMC (Genetically Modified MJMCMC) } \examples{ -result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) summary(result) plot(result) diff --git a/man/mjmcmc.parallel.Rd b/man/mjmcmc.parallel.Rd index 756ce4f86cac4a8089b9ad715a64c0217bcd1692..f25fab99e9c93bc3c0bc24289c4e4ab8583b2428 100644 --- a/man/mjmcmc.parallel.Rd +++ b/man/mjmcmc.parallel.Rd @@ -20,7 +20,11 @@ Merged results from multiple mjmcmc runs Run multiple mjmcmc runs in parallel, merging the results before returning. } \examples{ -result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc.parallel(runs = 1, +cores = 1, +loglik.pi = FBMS::gaussian.loglik, +y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100)) summary(result) plot(result) diff --git a/man/model.string.Rd b/man/model.string.Rd index 7a3e8bea07b4a8a6580d1c5f0038895ffa4a5d83..d2769d1c34e4efce2d4d72fe50d2e47a55fc2478 100644 --- a/man/model.string.Rd +++ b/man/model.string.Rd @@ -22,7 +22,9 @@ A character representation of a model Function to generate a function string for a model consisting of features } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +P = 2, transforms = c("p0", "exp_dbl")) summary(result) plot(result) model.string(c(TRUE, FALSE, TRUE, FALSE, TRUE), result$populations[[1]]) diff --git a/man/plot.gmjmcmc.Rd b/man/plot.gmjmcmc.Rd index ff2fd5def39c5153c5352207017db7046691c8c2..8e0e88cca03bc15bcdd212a8181b6f00e0133881 100644 --- a/man/plot.gmjmcmc.Rd +++ b/man/plot.gmjmcmc.Rd @@ -28,7 +28,10 @@ Function to plot the results, works both for results from gmjmcmc and merged results from merge.results } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +P = 2, +transforms = c("p0", "exp_dbl")) plot(result) diff --git a/man/plot.gmjmcmc_merged.Rd b/man/plot.gmjmcmc_merged.Rd index f2aa5c6cf6a8c57b168a075da8dd3d193c5b2e3e..717fa76efb24f49d694fe47745966bddea7b1592 100644 --- a/man/plot.gmjmcmc_merged.Rd +++ b/man/plot.gmjmcmc_merged.Rd @@ -29,12 +29,10 @@ Plot a gmjmcmc_merged run result <- gmjmcmc.parallel( runs = 1, cores = 1, - list(populations = "best", complex.measure = 2, tol = 0.0000001), - matrix(rnorm(600), 100), + y = matrix(rnorm(100), 100), + x = matrix(rnorm(600), 100), P = 2, - gaussian.loglik, - loglik.alpha = gaussian.loglik.alpha, - c("p0", "exp_dbl") + transforms = c("p0", "exp_dbl") ) plot(result) diff --git a/man/plot.mjmcmc.Rd b/man/plot.mjmcmc.Rd index 050e495b8836ed73d711214d01087ac2d8cf5b81..6997792d7301ecc41b6e3b8ddb595137002dd0f9 100644 --- a/man/plot.mjmcmc.Rd +++ b/man/plot.mjmcmc.Rd @@ -22,7 +22,7 @@ Function to plot the results, works both for results from gmjmcmc and merged results from merge.results } \examples{ -result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) plot(result) } diff --git a/man/plot.mjmcmc_parallel.Rd b/man/plot.mjmcmc_parallel.Rd index 0a11c9258b86f1217c4264da86c4c495552f67ff..f957c5959d0d9740b17e5ed3cd8f61fa3faf6bb7 100644 --- a/man/plot.mjmcmc_parallel.Rd +++ b/man/plot.mjmcmc_parallel.Rd @@ -20,7 +20,11 @@ No return value, just creates a plot Plot a mjmcmc_parallel run } \examples{ -result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc.parallel(runs = 1, +cores = 1, +y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +gaussian.loglik) plot(result) } diff --git a/man/predict.gmjmcmc.Rd b/man/predict.gmjmcmc.Rd index f154a4bc473412ef0057a8def9c2f46aa721a86e..58c08b12aa6e2b4ca48828c482fdeccdfdaaf6fb 100644 --- a/man/predict.gmjmcmc.Rd +++ b/man/predict.gmjmcmc.Rd @@ -39,11 +39,10 @@ Predict using a gmjmcmc result object. } \examples{ result <- gmjmcmc( - matrix(rnorm(600), 100), + x = matrix(rnorm(600), 100), + y = matrix(rnorm(100), 100), P = 2, - gaussian.loglik, - loglik.alpha = gaussian.loglik.alpha, - c("p0", "exp_dbl") + transforms = c("p0", "exp_dbl") ) preds <- predict(result, matrix(rnorm(600), 100)) diff --git a/man/predict.gmjmcmc_merged.Rd b/man/predict.gmjmcmc_merged.Rd index 5679e4ca62dff6ee12940207a43eda04a7f21f85..c698aa75181d325a72896c875222af4d5e3fd9ac 100644 --- a/man/predict.gmjmcmc_merged.Rd +++ b/man/predict.gmjmcmc_merged.Rd @@ -42,11 +42,10 @@ result <- gmjmcmc.parallel( runs = 1, cores = 1, list(populations = "best", complex.measure = 2, tol = 0.0000001), - matrix(rnorm(600), 100), + x = matrix(rnorm(600), 100), + y = matrix(rnorm(100), 100), P = 2, - gaussian.loglik, - loglik.alpha = gaussian.loglik.alpha, - c("p0", "exp_dbl") + transforms = c("p0", "exp_dbl") ) preds <- predict(result, matrix(rnorm(600), 100)) diff --git a/man/predict.gmjmcmc_parallel.Rd b/man/predict.gmjmcmc_parallel.Rd index f2dd556cb2fb3ef8efa7ba58b7048ac2f2c87e78..b78935b629e62b8555f9f3e5709999fadf12e579 100644 --- a/man/predict.gmjmcmc_parallel.Rd +++ b/man/predict.gmjmcmc_parallel.Rd @@ -30,12 +30,11 @@ result <- gmjmcmc.parallel( runs = 1, cores = 1, list(populations = "best", complex.measure = 2, tol = 0.0000001), - matrix(rnorm(600), 100), + x = matrix(rnorm(600), 100), + y = matrix(rnorm(100), 100), P = 2, - gaussian.loglik, - loglik.alpha = gaussian.loglik.alpha, - c("p0", "exp_dbl") + transforms = c("p0", "exp_dbl") ) -preds <- predict(result$results, matrix(rnorm(600), 100)) +preds <- predict(result, matrix(rnorm(600), 100)) } diff --git a/man/predict.mjmcmc.Rd b/man/predict.mjmcmc.Rd index 8d6c962a37054782bbb988e93ab6becee916aa28..f36afb775585eca0dafef74b5ce4e62e961feff3 100644 --- a/man/predict.mjmcmc.Rd +++ b/man/predict.mjmcmc.Rd @@ -26,7 +26,7 @@ A list containing aggregated predictions. Predict using a mjmcmc result object. } \examples{ -result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) -preds <- predict(result, matrix(rnorm(500), 100)) +result <- mjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), gaussian.loglik) +preds <- predict(result, matrix(rnorm(600), 100)) } diff --git a/man/predict.mjmcmc_parallel.Rd b/man/predict.mjmcmc_parallel.Rd index cbacd449b801b581bb5cb9a7a364fb53b7a5248c..ba5a9f8eed8b8b8903bf6552156ba63046e8ba5f 100644 --- a/man/predict.mjmcmc_parallel.Rd +++ b/man/predict.mjmcmc_parallel.Rd @@ -26,7 +26,11 @@ A list containing aggregated predictions. Predict using a mjmcmc result object from a parallel run. } \examples{ -result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) -preds <- predict(result, matrix(rnorm(500), 100)) +result <- mjmcmc.parallel(runs = 1, +cores = 1, +x = matrix(rnorm(600), 100), +y = matrix(rnorm(100), 100), +gaussian.loglik) +preds <- predict(result, matrix(rnorm(600), 100)) } diff --git a/man/print.feature.Rd b/man/print.feature.Rd index 06eb37a72bc75ad51cc1201f5fed4f82acefba24..4ad803c214a05310d271bb77d95fc8e09e3562c2 100644 --- a/man/print.feature.Rd +++ b/man/print.feature.Rd @@ -4,13 +4,23 @@ \alias{print.feature} \title{Print method for "feature" class} \usage{ -\method{print}{feature}(x, dataset = FALSE, alphas = FALSE, labels = FALSE, round = FALSE, ...) +\method{print}{feature}( + x, + dataset = FALSE, + fixed = 0, + alphas = FALSE, + labels = FALSE, + round = FALSE, + ... +) } \arguments{ \item{x}{An object of class "feature"} \item{dataset}{Set the regular covariates as columns in a dataset} +\item{fixed}{How many of the first columns in dataset are fixed and do not contribute to variable selection} + \item{alphas}{Print a "?" instead of actual alphas to prepare the output for alpha estimation} \item{labels}{Should the covariates be named, or just referred to as their place in the data.frame.} @@ -26,7 +36,10 @@ String representation of a feature Print method for "feature" class } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(x = matrix(rnorm(600), 100), +y = matrix(rnorm(100), 100), +P = 2, +transforms = c("p0", "exp_dbl")) print(result$populations[[1]][1]) } diff --git a/man/string.population.Rd b/man/string.population.Rd index 86c6d8165831b9cdc5121fac9af0f1ad39fc9e85..fb6c21bc66790e25985f05e08ddb4a601f2d6af0 100644 --- a/man/string.population.Rd +++ b/man/string.population.Rd @@ -18,7 +18,10 @@ A matrix of character representations of the features of a model. Function to get a character representation of a list of features } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +P = 2, +transforms = c("p0", "exp_dbl")) string.population(result$populations[[1]]) } diff --git a/man/string.population.models.Rd b/man/string.population.models.Rd index 5c6c70e2174f1c132f254b758814e8fba7a0a28a..f287ac7879f2c9a7d43b17fcbbe882ac020ee84b 100644 --- a/man/string.population.models.Rd +++ b/man/string.population.models.Rd @@ -22,7 +22,10 @@ A matrix of character representations of a list of models. Function to get a character representation of a list of models } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +P = 2, +transforms = c("p0", "exp_dbl")) string.population.models(result$populations[[2]], result$models[[2]]) } diff --git a/man/summary.gmjmcmc.Rd b/man/summary.gmjmcmc.Rd index ca112c751666cc9e020af4c82f9ea1d032c97684..bc1d172e4359adc284a9401a8e7b9725458d2232 100644 --- a/man/summary.gmjmcmc.Rd +++ b/man/summary.gmjmcmc.Rd @@ -41,7 +41,10 @@ A data frame containing the following columns: Function to print a quick summary of the results } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +P = 2, +transforms = c("p0", "exp_dbl")) summary(result, pop = "best") } diff --git a/man/summary.gmjmcmc_merged.Rd b/man/summary.gmjmcmc_merged.Rd index b979eb95373885090f8b43a25bdcd92a44312b12..351a9f40dc6522878dbd9713c195a6371263bac6 100644 --- a/man/summary.gmjmcmc_merged.Rd +++ b/man/summary.gmjmcmc_merged.Rd @@ -45,11 +45,10 @@ result <- gmjmcmc.parallel( runs = 1, cores = 1, list(populations = "best", complex.measure = 2, tol = 0.0000001), - matrix(rnorm(600), 100), + y = matrix(rnorm(100), 100), + x = matrix(rnorm(600), 100), P = 2, - gaussian.loglik, - loglik.alpha = gaussian.loglik.alpha, - c("p0", "exp_dbl") + transforms = c("p0", "exp_dbl") ) summary(result) diff --git a/man/summary.mjmcmc.Rd b/man/summary.mjmcmc.Rd index fde06a678d0ffe0850346596f60d33b3b908f819..8c11ceed76c8093c05a2abc1de4c83215144555a 100644 --- a/man/summary.mjmcmc.Rd +++ b/man/summary.mjmcmc.Rd @@ -35,7 +35,9 @@ A data frame containing the following columns: Function to print a quick summary of the results } \examples{ -result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc(y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +gaussian.loglik) summary(result) } diff --git a/man/summary.mjmcmc_parallel.Rd b/man/summary.mjmcmc_parallel.Rd index fad3f4a63a936848ec192908256961c1ab97edfd..2c77b2a2054d41be69d905115f07dd28c4b158d5 100644 --- a/man/summary.mjmcmc_parallel.Rd +++ b/man/summary.mjmcmc_parallel.Rd @@ -35,7 +35,11 @@ A data frame containing the following columns: Function to print a quick summary of the results } \examples{ -result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc.parallel(runs = 1, +cores = 1, +y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +gaussian.loglik) summary(result) } diff --git a/tests/testthat/test_fbms.R b/tests/testthat/test_fbms.R new file mode 100644 index 0000000000000000000000000000000000000000..51f9e0ceb9eb02536512ed32652828ea109ecde3 --- /dev/null +++ b/tests/testthat/test_fbms.R @@ -0,0 +1,96 @@ +# Title : General tests +# Objective : Test the code +# Created by: jonlachmann +# Created on: 2021-02-25 + +context("MJMCMC") + +test_that("Test (G)MJMCMC through the fbms function", { + set.seed(123) + x <- matrix(rnorm(300), 100) + y <- rnorm(100, 0, 0.5) + rowSums(x[, 1:2]) + y_shift <- y + 10 + y_sin <- rnorm(100, 0, 0.05) + sin(x[, 1]) * 3 + sin(x[, 2]) * 2 + y_sin_shift <- y_sin + 10 + + validate.model <- function (model, x, y) { + expect_true(all(c(model$marg.probs[1:2] > 0.9, model$marg.probs[3] < 0.9))) + summary <- summary(model, labels = c("a", "b", "c"), tol = -1) + expect_true(all(c(summary$marg.probs[1:2] > 0.9, summary$marg.probs[3] < 0.9))) + plot(model) + pred <- predict(model, x) + # Handle paralell runs + if (!is.null(pred$aggr)) { + pred <- pred$aggr + } + rmse <- sqrt(mean((pred$mean - y)^2)) + expect_true(rmse < 0.6) + best_model <- get.best.model(model) + mpm_model <- get.mpm.model(model, y, x) + } + + validate.gmodel <- function (model, x, y) { + suppressMessages(summary <- summary(model, labels = c("a", "b", "c"), tol = -1)) + expect_true(all(c(summary$marg.probs[1:2] > 0.9, summary$marg.probs[-(1:2)] < 0.9))) + expect_true(all(summary$feats.strings[1:2] %in% c("sin(a)", "sin(b)"))) + plot(model) + pred <- predict(model, x) + # Handle paralell runs + if (!is.null(pred$aggr)) { + pred <- pred$aggr + } + rmse <- sqrt(mean((pred$mean - y)^2)) + expect_true(rmse < 0.2) + best_model <- get.best.model(model) + mpm_model <- get.mpm.model(model, y, x) + } + + params <- gen.params.gmjmcmc(ncol(x)) + probs <- gen.probs.gmjmcmc("sin") + probs$gen <- c(0, 1, 0, 0) + params$feat$D <- 1 + params$feat$L <- 2 + + # No intercept + data <- as.data.frame(cbind(y, x)) + colnames(data) <- c("y", "a", "b", "c") + mod1 <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "mjmcmc", data = data, verbose = FALSE) + mod1p <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "mjmcmc.parallel", data = data, verbose = FALSE) + validate.model(mod1, x, y) + validate.model(mod1p, x, y) + + data$y <- y_sin + gmod1 <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "gmjmcmc", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) + gmod1p <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "gmjmcmc.parallel", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) + validate.gmodel(gmod1, x, y_sin) + validate.gmodel(gmod1p, x, y_sin) + + # Model defined intercept + data$y <- y_shift + mod2 <- fbms(y ~ ., family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "mjmcmc", data = data, verbose = FALSE) + mod2p <- fbms(y ~ ., family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "mjmcmc.parallel", data = data, verbose = FALSE) + validate.model(mod2, x, y_shift) + validate.model(mod2p, x, y_shift) + + data$y <- y_sin_shift + gmod2 <- fbms(y ~ ., family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "gmjmcmc", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) + gmod2p <- fbms(y ~ ., family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "gmjmcmc.parallel", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) + validate.gmodel(gmod2, x, y_sin_shift) + validate.gmodel(gmod2p, x, y_sin_shift) + + # User defined intercept + data <- cbind(data[, 1], 1, data[, -1]) + colnames(data) <- c("y", "const", "a", "b", "c") + data$y <- y_shift + mod3 <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "mjmcmc", data = data, fixed = 1, verbose = FALSE) + mod3p <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "mjmcmc.parallel", data = data, fixed = 1, verbose = FALSE) + validate.model(mod3, cbind(1, x), y_shift) + validate.model(mod3p, cbind(1, x), y_shift) + + data$y <- y_sin_shift + gmod3 <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "gmjmcmc", data = data, transforms = "sin", params = params, probs = probs, P = 20, fixed = 1, verbose = FALSE) + gmod3p <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "gmjmcmc.parallel", data = data, transforms = "sin", params = params, probs = probs, P = 20, fixed = 1, verbose = FALSE) + validate.gmodel(gmod3, cbind(1, x), y_sin_shift) + validate.gmodel(gmod3p, cbind(1, x), y_sin_shift) +}) + diff --git a/tests/testthat/test_local_optim.R b/tests/testthat/test_local_optim.R index 76357d99321ef65867bf8404d24c6578368cdab4..76bdf390ff21bc5d7edf927b22a9debbadbc93e9 100644 --- a/tests/testthat/test_local_optim.R +++ b/tests/testthat/test_local_optim.R @@ -17,7 +17,7 @@ test_that("Testing Greedy algorithm", { # Optimize empty model but dont allow all indices, should set all to true except disallowed optmod <- greedy.optim( c(F, F, F, F, F, F, F, F, F, F), - NULL, + list(fixed = 0), loglik.tester, c(F, F, T, T, T, T, T, T, T, T), NULL, diff --git a/tests/testthat/test_mjmcmc.R b/tests/testthat/test_mjmcmc.R index 0257d461949927a75e7b243c6ab60cf71d017b9e..a317546b110decd84752081aa40feaa94da6f89e 100644 --- a/tests/testthat/test_mjmcmc.R +++ b/tests/testthat/test_mjmcmc.R @@ -5,50 +5,80 @@ context("MJMCMC") -test_that("Testing MJMCMC algorithm", { - # Dummy test likelihood function - loglik.tester <- function (y, x, model, complex, params) { - lmmod <- lm.fit(x[, model, drop = FALSE], y) - n <- length(y) - k <- length(lmmod$coefficients) - rss <- sum(resid(lmmod)^2) - aic <- n * log(rss / n) + 2 * k - return(list(crit = aic, coefs = lmmod$coefficients)) +test_that("Test (G)MJMCMC", { + set.seed(123) + x <- matrix(rnorm(300), 100) + y <- rnorm(100, 0, 0.5) + rowSums(x[, 1:2]) + y_shift <- y + 10 + y_sin <- rnorm(100, 0, 0.05) + sin(x[, 1]) * 3 + sin(x[, 2]) * 2 + y_sin_shift <- y_sin + 10 + + validate.model <- function (model, x, y) { + expect_true(all(c(model$marg.probs[1:2] > 0.9, model$marg.probs[3] < 0.9))) + summary <- summary(model, labels = c("a", "b", "c"), tol = -1) + expect_true(all(c(summary$marg.probs[1:2] > 0.9, summary$marg.probs[3] < 0.9))) + plot(model) + pred <- predict(model, x) + # Handle paralell runs + if (!is.null(pred$aggr)) { + pred <- pred$aggr + } + rmse <- sqrt(mean((pred$mean - y)^2)) + expect_true(rmse < 0.6) + best_model <- get.best.model(model) + mpm_model <- get.mpm.model(model, y, x) + } + + validate.gmodel <- function (model, x, y) { + summary <- summary(model, labels = c("a", "b", "c"), tol = -1) + expect_true(all(c(summary$marg.probs[1:2] > 0.9, summary$marg.probs[-(1:2)] < 0.9))) + expect_true(all(summary$feats.strings[1:2] %in% c("sin(a)", "sin(b)"))) + plot(model) + pred <- predict(model, x) + # Handle paralell runs + if (!is.null(pred$aggr)) { + pred <- pred$aggr + } + rmse <- sqrt(mean((pred$mean - y)^2)) + expect_true(rmse < 0.2) + best_model <- get.best.model(model) + mpm_model <- get.mpm.model(model, y, x) } - data <- matrix(rnorm(600), 100) - resm <- mjmcmc(data, loglik.tester) - summary(resm, labels = c("a", "b", "c", "d", "e")) - plot(resm) - predm <- predict(resm, data[, -1, drop = FALSE]) - - resg <- gmjmcmc(data, loglik.tester, NULL, c("p0", "exp_dbl")) - summary(resg) - plot(resg) - prediction <- predict(resg, data[, -1, drop = FALSE]) - - respm <- mjmcmc.parallel(2, 2, data, loglik.tester) - summary(respm) - plot(respm) - pred_pm <- predict(respm, data[, -1, drop = FALSE]) - - respg <- gmjmcmc.parallel(2, 2, NULL, data, loglik.tester, NULL, c("p0", "exp_dbl")) - summary(respg) - plot(respg) - pred_pg <- predict(respg, data[, -1, drop = FALSE]) - - fbms_result <- fbms( - X1 ~ ., - family = "gaussian", - method = "gmjmcmc.parallel", - data = data.frame(matrix(rnorm(600), 100)), - transforms = c("sin","cos"), - P = 10, - runs = 1, - cores = 1 - ) - - # Dummy expect to run the test - expect_true(is.list(fbms_result)) + params <- gen.params.gmjmcmc(ncol(x)) + probs <- gen.probs.gmjmcmc("sin") + probs$gen <- c(0, 1, 0, 0) + params$feat$D <- 1 + params$feat$L <- 2 + mlpost_params = list(family = "gaussian", beta_prior = list(type = "Jeffreys-BIC")) + + # No intercept + mod1 <- mjmcmc(x, y, gaussian.loglik, mlpost_params, intercept = FALSE) + mod1p <- mjmcmc.parallel(2, 2, x, y, gaussian.loglik, mlpost_params, intercept = FALSE) + validate.model(mod1, x, y) + validate.model(mod1p, x, y) + + gmod1 <- gmjmcmc(x, y_sin, gaussian.loglik, mlpost_params = mlpost_params, transforms = "sin", params = params, probs = probs, P = 20, intercept = FALSE, verbose = FALSE) + gmod1p <- gmjmcmc.parallel(2, 2, x = x, y = y_sin, loglik.pi = gaussian.loglik, mlpost_params = mlpost_params, transforms = "sin", params = params, probs = probs, intercept = FALSE, verbose = FALSE) + validate.gmodel(gmod1, x, y_sin) + validate.gmodel(gmod1p, x, y_sin) + + # Model defined intercept + mod2 <- mjmcmc(x, y_shift, gaussian.loglik, mlpost_params, intercept = TRUE) + validate.model(mod2, x, y_shift) + + gmod2 <- gmjmcmc(x, y_sin_shift, gaussian.loglik, mlpost_params = mlpost_params, transforms = "sin", params = params, probs = probs, intercept = TRUE, P = 20, verbose = FALSE) + gmod2p <- gmjmcmc.parallel(2, 2, x = x, y = y_sin_shift, loglik.pi = gaussian.loglik, mlpost_params = mlpost_params, transforms = "sin", params = params, probs = probs, intercept = TRUE, verbose = FALSE) + validate.gmodel(gmod2, x, y_sin_shift) + validate.gmodel(gmod2p, x, y_sin_shift) + + # User defined intercept + mod3 <- mjmcmc(cbind(1, x), y_shift, gaussian.loglik, mlpost_params, fixed = 1) + validate.model(mod3, cbind(1, x), y_shift) + + gmod3 <- gmjmcmc(cbind(1, x), y_sin_shift, gaussian.loglik, mlpost_params = mlpost_params, transforms = "sin", params = params, probs = probs, fixed = 1, intercept = FALSE, P = 20, verbose = FALSE) + gmod3p <- gmjmcmc.parallel(2, 2, x = cbind(1, x), y = y_sin_shift, loglik.pi = gaussian.loglik, mlpost_params = mlpost_params, transforms = "sin", params = params, probs = probs, fixed = 1, intercept = FALSE, verbose = FALSE) + validate.gmodel(gmod3, cbind(1, x), y_sin_shift) + validate.gmodel(gmod3p, cbind(1, x), y_sin_shift) }) diff --git a/tests/testthat/test_priors.R b/tests/testthat/test_priors.R new file mode 100644 index 0000000000000000000000000000000000000000..9836c775fb851c72f13ea3089caa27cd011447e7 --- /dev/null +++ b/tests/testthat/test_priors.R @@ -0,0 +1,85 @@ +# Title : General tests +# Objective : Test the code +# Created by: jonlachmann +# Created on: 2021-02-25 + +context("Priors") + +test_that("Test various priors through the fbms function", { + set.seed(123) + x <- matrix(rnorm(300), 100) + y <- rnorm(100, 0, 0.5) + rowSums(x[, 1:2]) + + validate.model <- function (model, x, y) { + expect_true(all(c(model$marg.probs[1:2] > 0.9, model$marg.probs[3] < 0.9))) + summary <- summary(model, labels = c("a", "b", "c"), tol = -1) + expect_true(all(c(summary$marg.probs[1:2] > 0.9, summary$marg.probs[3] < 0.9))) + plot(model) + pred <- predict(model, x) + # Handle paralell runs + if (!is.null(pred$aggr)) { + pred <- pred$aggr + } + rmse <- sqrt(mean((pred$mean - y)^2)) + expect_true(rmse < 0.6) + best_model <- get.best.model(model) + mpm_model <- get.mpm.model(model, y, x) + } + + # No intercept + data <- as.data.frame(cbind(y, x)) + colnames(data) <- c("y", "a", "b", "c") + family <- "gaussian" + beta_prior <- list(type = "g-prior", g = 5, a = 3, b = 1, s = 1, rho = 0, v = 1, k = 1) + + gaussian_priors <- c( + "CH", "tCCH", "TG", "beta.prime", "intrinsic", "ZS-adapted", "uniform","Jeffreys", "benchmark", "robust", + "g-prior", "hyper-g", "EB-local", "ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS", + "Jeffreys-BIC", "g-prior" + ) + + expected <- list( + CH = list(crit = 1.79116859063856e+28, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + tCCH = list(crit = 1.79116859063856e+28, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + TG = list(crit = 3.30010826314369e+26, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + beta.prime = list(crit = 2.37584282903985e+33, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + intrinsic = list(crit = 16.013997004248, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + `ZS-adapted` = list(crit = 3.08085895954179e+52, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + uniform = list(crit = 2.6188594083781e+29, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + Jeffreys = list(crit = 1.96893489476632e+31, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + benchmark = list(crit = 1.88194647439014e+31, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + robust = list(crit = 15.4590106422538, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + `g-prior` = list(crit = 61.0283622188635, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + `hyper-g` = list(crit = 88.8999586451438, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + `EB-local` = list(crit = 91.5555793778985, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + `ZS-null` = list(crit = 90.2037895063674, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + `ZS-full` = list(crit = 0, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + BIC = list(crit = -170.850296079658, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + `hyper-g-laplace` = list(crit = 88.8449712472898, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + AIC = list(crit = -166.942540800676, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + `EB-global` = list(crit = 91.5555793778985, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + `hyper-g-n` = list(crit = 90.6511779673767, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + JZS = list(crit = 90.9166314790781, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + `Jeffreys-BIC` = list(crit = -83.4856401007207, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)) + ) + + for (prior in gaussian_priors) { + if (prior == "TG") next + beta_prior$type <- prior + results[[prior]] <- fbms.mlpost.master(data[, 1], x, c(TRUE, TRUE, TRUE), list(), list(beta_prior = beta_prior, family = "gaussian", r = exp(-0.5))) + print(prior) + print(results[[prior]]$crit) + #expect_equal(results[[prior]]$crit, expected[[prior]]$crit) + #mod1 <- fbms(y ~ ., family = family, beta_prior = beta_prior, method = "mjmcmc", data = data, verbose = FALSE) + #if (!(prior %in% c("hyper-g", "EB-local", "uniform", "ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS"))) { + # validate.model(mod1, x, y) + #} + } + results2 <- list() + for (prior in gaussian_priors) { + results2[[prior]] <- fbms.mlik.master2(data[, 1], x, c(TRUE, TRUE, TRUE), list(), list(prior_beta = prior, family = "gaussian", r = exp(-0.5), g = 5, a = 3, b = 1, s = 1, rho = 0, v = 1, k = 1)) + print(prior) + print(results[[prior]]$crit) + } +}) + diff --git a/tests_current/Ex10_Sec6_2.R b/tests_current/Ex10_Sec6_2.R index 53e83a863f6e3fac6d2bdda838b8fafe8584dec6..428a895996949ae606d7a098b62f00d2135a7a10 100644 --- a/tests_current/Ex10_Sec6_2.R +++ b/tests_current/Ex10_Sec6_2.R @@ -40,13 +40,13 @@ transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(1,1,0,1) # Modifications and interactions! -params <- gen.params.gmjmcmc(df) +params <- gen.params.gmjmcmc(ncol(df) - 1) params$feat$D <- 1 # Set depth of features to 1 (still allows for interactions) -params$loglik$r = 1/dim(df)[1] +params$mlpost$r = 1/dim(df)[1] params$feat$pop.max = 10 #specify indices for a random effect -params$loglik$dr = droplevels(Zambia$dr) # district ids for repeated measurements +params$mlpost$dr = droplevels(Zambia$dr) # district ids for repeated measurements #estimator function with lme4 @@ -87,7 +87,7 @@ mixed.model.loglik.lme4 <- function (y, x, model, complex, params) #estimator function with INLA -params$loglik$INLA.num.threads = 10 # Number of threads used by INLA +params$mlpost$INLA.num.threads = 10 # Number of threads used by INLA #params$feat$keep.min = 0.2 @@ -135,7 +135,7 @@ mixed.model.loglik.inla <- function (y, x, model, complex, params) #estimator function with RTMB -params$loglik$nr_dr = sum((table(Zambia$dr))>0) #number of districts (that is number of different random intercepts) +params$mlpost$nr_dr = sum((table(Zambia$dr))>0) #number of districts (that is number of different random intercepts) mixed.model.loglik.rtmb <- function (y, x, model, complex, params) { @@ -203,7 +203,7 @@ if (use.fbms) { transforms = transforms, N.init = 30, probs = probs, params = params, P=3) } else { - result1a <- gmjmcmc(data = df, loglik.pi = mixed.model.loglik.lme4, + result1a <- gmjmcmc(x = df[, -1], y = df[, 1], loglik.pi = mixed.model.loglik.lme4, transforms = transforms, N.init = 30, probs = probs, params = params, P = 3) } @@ -219,7 +219,7 @@ if (use.fbms) { transforms = transforms, N.init = 30, probs = probs, params = params, P=3) } else { - result1b <- gmjmcmc(data = df, loglik.pi = mixed.model.loglik.inla, + result1b <- gmjmcmc(x = df[, -1], y = df[, 1], , loglik.pi = mixed.model.loglik.inla, transforms = transforms, N.init = 30, probs = probs, params = params, P = 3) } @@ -231,7 +231,7 @@ if (use.fbms) { transforms = transforms, N.init = 30, probs = probs, params = params, P=3) } else { - result1c <- gmjmcmc(data = df, loglik.pi = mixed.model.loglik.rtmb, + result1c <- gmjmcmc(x = df[, -1], y = df[, 1], , loglik.pi = mixed.model.loglik.rtmb, transforms = transforms, N.init = 30, probs = probs, params = params, P = 3) } @@ -249,13 +249,13 @@ c(time.lme4$callback_msg, time.inla$callback_msg, time.rtmb$callback_msg) set.seed(20062024) params$feat$pop.max = 10 -result2a <- gmjmcmc.parallel(runs = 40, cores = 10, data = df, loglik.pi = mixed.model.loglik.lme4, transforms = transforms, N.init=100, probs = probs, params = params, P = 25) +result2a <- gmjmcmc.parallel(runs = 40, cores = 10, x = df[, -1], y = df[, 1], loglik.pi = mixed.model.loglik.lme4, transforms = transforms, N.init=100, probs = probs, params = params, P = 25) summary(result2a,tol = 0.05,labels=names(df)[-1]) set.seed(21062024) -result2b <- gmjmcmc.parallel(runs = 120, cores = 40, data = df, loglik.pi = mixed.model.loglik.lme4, transforms = transforms, N.init=100, probs = probs, params = params, P = 25) +result2b <- gmjmcmc.parallel(runs = 120, cores = 40, x = df[, -1], y = df[, 1], loglik.pi = mixed.model.loglik.lme4, transforms = transforms, N.init=100, probs = probs, params = params, P = 25) summary(result2b, labels = names(df)[-1]) @@ -267,7 +267,7 @@ plot(result2b) set.seed(03072024) -result2c <- gmjmcmc.parallel(runs = 200, cores = 40, data = df, loglik.pi = mixed.model.loglik.lme4, transforms = transforms, N.init=100, probs = probs, params = params, P = 25) +result2c <- gmjmcmc.parallel(runs = 200, cores = 40, x = df[, -1], y = df[, 1], loglik.pi = mixed.model.loglik.lme4, transforms = transforms, N.init=100, probs = probs, params = params, P = 25) summary(result2c, labels = names(df)[-1]) summary(result2c, labels = names(df)[-1], pop = "last") @@ -288,8 +288,8 @@ summary(result2c, labels = names(df)[-1]) set.seed(22052024) -params$loglik$INLA.num.threads = 1 # Number of threads used by INLA set to 1 -result2a <- gmjmcmc.parallel(runs = 20, cores = 20, data = df, loglik.pi = mixed.model.loglik.inla, transforms = transforms, N.init=30, probs = probs, params = params, P = 25) +params$mlpost$INLA.num.threads = 1 # Number of threads used by INLA set to 1 +result2a <- gmjmcmc.parallel(runs = 20, cores = 20, x = df[, -1], y = df[, 1], loglik.pi = mixed.model.loglik.inla, transforms = transforms, N.init=30, probs = probs, params = params, P = 25) plot(result2a) summary(result2a, labels = names(df)[-1]) @@ -300,8 +300,8 @@ summary(result2a, labels = names(df)[-1]) params$feat$check.col = F set.seed(20062024) -params$loglik$INLA.num.threads = 1 # Number of threads used by INLA set to 1 -result2b <- gmjmcmc.parallel(runs = 100, cores = 20, data = df, loglik.pi = mixed.model.loglik.inla, transforms = transforms, N.init=16, probs = probs, params = params, P = 15) +params$mlpost$INLA.num.threads = 1 # Number of threads used by INLA set to 1 +result2b <- gmjmcmc.parallel(runs = 100, cores = 20, x = df[, -1], y = df[, 1], loglik.pi = mixed.model.loglik.inla, transforms = transforms, N.init=16, probs = probs, params = params, P = 15) summary(result2b, labels = names(df)[-1]) diff --git a/tests_current/Ex11_Sec6_3.R b/tests_current/Ex11_Sec6_3.R index 113b3953f69c687e347823dc70c1e21f411e8054..ae2337c9754fd9fe3c96e973f0fe077c696d3603 100644 --- a/tests_current/Ex11_Sec6_3.R +++ b/tests_current/Ex11_Sec6_3.R @@ -35,13 +35,13 @@ transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(1,1,0,1) # Only modifications! -params <- gen.params.gmjmcmc(df) +params <- gen.params.gmjmcmc(ncol(df) - 1) params$feat$D <- 2 # Set depth of features to 2 (allow for interactions) -params$loglik$r = 1/dim(df)[1] +params$mlpost$r = 1/dim(df)[1] #specify indices for a random effect -params$loglik$PID = data$Ind # patient ids for repeated measurements -params$loglik$INLA.num.threads = 10 # Number of threads used by INLA +params$mlpost$PID = data$Ind # patient ids for repeated measurements +params$mlpost$INLA.num.threads = 10 # Number of threads used by INLA params$feat$keep.min = 0.2 @@ -101,7 +101,7 @@ if (use.fbms) { result <- fbms(data = df, family = "custom", loglik.pi = poisson.loglik.inla, method = "gmjmcmc", transforms = transforms, probs = probs, params = params, P=3) } else { - result <- gmjmcmc(data = df, loglik.pi = poisson.loglik.inla, transforms = transforms, + result <- gmjmcmc(x = df[, -1], y = df[, 1], , loglik.pi = poisson.loglik.inla, transforms = transforms, probs = probs, params = params, P = 3) } @@ -113,13 +113,13 @@ summary(result) set.seed(23052024) tic() -params$loglik$INLA.num.threads = 1 # Number of threads used by INLA set to 1 +params$mlpost$INLA.num.threads = 1 # Number of threads used by INLA set to 1 if (use.fbms) { result2 <- fbms(data = df, family = "custom", loglik.pi = poisson.loglik.inla, method = "gmjmcmc.parallel", runs = 40, cores = 40, transforms = transforms, probs = probs, params = params, P=25) } else { - result2 <- gmjmcmc.parallel(runs = 40, cores = 40, data = df, loglik.pi = poisson.loglik.inla, + result2 <- gmjmcmc.parallel(runs = 40, cores = 40, x = df[, -1], y = df[, 1], , loglik.pi = poisson.loglik.inla, transforms = transforms, probs = probs, params = params, P = 25) } time.inla = toc() diff --git a/tests_current/Ex12_Sec6_4.R b/tests_current/Ex12_Sec6_4.R index 884f1d3a6236e8d7aa91ed48833a2cbbfa5bc938..298ccebb5bcbcea04bead9f0a68882d7d6123905 100644 --- a/tests_current/Ex12_Sec6_4.R +++ b/tests_current/Ex12_Sec6_4.R @@ -36,9 +36,9 @@ n = dim(df)[1] p = dim(df)[2] - 1 -params <- gen.params.gmjmcmc(data = df) -params$loglik$r = 0.5 -params$loglik$subs = 0.01 +params <- gen.params.gmjmcmc(ncol(df) - 1) +params$mlpost$r = 0.5 +params$mlpost$subs = 0.01 transforms <- c("sigmoid","pm1","p0","p05","p2","p3") @@ -82,14 +82,14 @@ logistic.posterior.bic.irlssgd <- function (y, x, model, complex, params) set.seed(100001) tic() # subsampling analysis -tmp1 <- gmjmcmc(df, logistic.posterior.bic.irlssgd, transforms = transforms, +tmp1 <- gmjmcmc(x = df[, -1], y = df[, 1], logistic.posterior.bic.irlssgd, transforms = transforms, params = params, P = 2, sub = T) time1 = toc() set.seed(100002) tic() # regular analysis -tmp2 <- gmjmcmc(df, logistic.loglik, transforms = transforms, +tmp2 <- gmjmcmc(x = df[, -1], y = df[, 1], logistic.loglik, transforms = transforms, params = params, P = 2) time2 = toc() @@ -107,7 +107,7 @@ c(time1, time2) set.seed(100003) tic() -result <- gmjmcmc.parallel(runs = 10,cores = 10, data = df, +result <- gmjmcmc.parallel(runs = 10,cores = 10, x = df[, -1], y = df[, 1], loglik.pi = logistic.posterior.bic.irlssgd, transforms = transforms, params = params, P = 3, sub = T) time3 = toc() @@ -120,7 +120,7 @@ summary(result) set.seed(100004) tic() -result1a <- gmjmcmc.parallel(runs = 10,cores = 10, data = df, +result1a <- gmjmcmc.parallel(runs = 10,cores = 10, x = df[, -1], y = df[, 1], loglik.pi = logistic.loglik, transforms = transforms, params = params, P = 3) time4 = toc() @@ -139,7 +139,7 @@ summary(result1a) set.seed(100005) tic() -result2 <- gmjmcmc.parallel(runs = 40,cores = 40, data = df, +result2 <- gmjmcmc.parallel(runs = 40,cores = 40, x = df[, -1], y = df[, 1], loglik.pi = logistic.posterior.bic.irlssgd, transforms = transforms, params = params, P = 10, sub = T) time5 = toc() @@ -153,7 +153,7 @@ summary(result2) set.seed(100006) tic() -result2a <- gmjmcmc.parallel(runs = 40,cores = 40, data = df, +result2a <- gmjmcmc.parallel(runs = 40,cores = 40, x = df[, -1], y = df[, 1], loglik.pi = logistic.loglik, transforms = transforms, params = params, P = 10) time6 = toc() diff --git a/tests_current/Ex13_Sec6_5.R b/tests_current/Ex13_Sec6_5.R index ac47c5d2df8b4c2d0226bd656a4a2d1ffddb5323..99521e0db5ce80be70f9b718fc51d0045cd14dd1 100644 --- a/tests_current/Ex13_Sec6_5.R +++ b/tests_current/Ex13_Sec6_5.R @@ -43,9 +43,9 @@ df.test <- df[-train,] time <- df.train$time -params <- gen.params.gmjmcmc(data = df.train[,-1]) -params$loglik$r = 0.5 -params$loglik$time = time #the time variable goes into the params structure +params <- gen.params.gmjmcmc(ncol(df.train) - 1) +params$mlpost$r = 0.5 +params$mlpost$time = time #the time variable goes into the params structure params$feat$keep.min = 0.2 @@ -83,7 +83,7 @@ surv.pseudo.loglik = function(y, x, model, complex, params){ #Single chain analysis (just to illustrate that it works) set.seed(5) -result <- gmjmcmc(data = df.train[,-1], loglik.pi = surv.pseudo.loglik, transforms = transforms, params = params, P = 5) +result <- gmjmcmc(x = df.train[, -(1:2)], y = df.train[, 2], loglik.pi = surv.pseudo.loglik, transforms = transforms, params = params, P = 5) summary(result,tol = 0.01,labels = names(df.train)[-c(1:2)],effects = c(0.025,0.5,0.975)) summary(result) @@ -97,7 +97,7 @@ linpreds <- predict(result,df.test[,-(1:2)], link = function(x) x) #Parallel version set.seed(15) probs$gen <- c(1,1,1,1) -result2 <- gmjmcmc.parallel(runs = 80, cores = 40, data = df.train[,-1], +result2 <- gmjmcmc.parallel(runs = 80, cores = 40, x = df.train[, -(1:2)], y = df.train[, 2], loglik.pi = surv.pseudo.loglik, transforms = transforms, probs = probs, params = params, P = 25) summary(result2,tol = 0.01,labels = names(df.train)[-1],effects = c(0.025,0.5,0.975)) @@ -112,7 +112,7 @@ linpreds2 <- predict(result2,df.test[,-(1:2)], link = function(x) x) #Parallel version only linear terms set.seed(25) probs$gen <- c(0,0,0,1) -result3 <- gmjmcmc.parallel(runs = 80, cores = 40, data = df.train[,-1], +result3 <- gmjmcmc.parallel(runs = 80, cores = 40, x = df.train[, -(1:2)], y = df.train[, 2], loglik.pi = surv.pseudo.loglik, transforms = transforms, probs = probs, params = params, P = 25) @@ -126,7 +126,7 @@ linpreds3 <- predict(result3,df.test[,-(1:2)], link = function(x) x) #Parallel version only fractional polynomials set.seed(35) probs$gen <- c(0,1,0,1) -result4 <- gmjmcmc.parallel(runs = 80, cores = 40, data = df.train[,-1], +result4 <- gmjmcmc.parallel(runs = 80, cores = 40, x = df.train[, -(1:2)], y = df.train[, 2], loglik.pi = surv.pseudo.loglik, transforms = transforms, probs = probs, params = params, P = 25) diff --git a/tests_current/Ex1_Sec3.R b/tests_current/Ex1_Sec3.R index ad7a9992e02b57b615a767fd151307305b33e068..39f3d6b23fc5a914395159d33e97f62ddafbee4c 100644 --- a/tests_current/Ex1_Sec3.R +++ b/tests_current/Ex1_Sec3.R @@ -15,13 +15,13 @@ library(FBMS) data <- read.csv("https://raw.githubusercontent.com/OpenExoplanetCatalogue/oec_tables/master/comma_separated/open_exoplanet_catalogue.txt") -data <- na.omit(data[,c("semimajoraxis","mass","radius","period","eccentricity","hoststar_mass","hoststar_radius","hoststar_metallicity","hoststar_temperature","binaryflag")]) +data <- na.omit(data[, c("semimajoraxis", "mass", "radius", "period", "eccentricity", "hoststar_mass", "hoststar_radius", "hoststar_metallicity", "hoststar_temperature", "binaryflag")]) summary(data) te.ind <- 540:939 -df.train = data[-te.ind,] -df.test = data[te.ind,] +df.train = data[-te.ind, ] +df.test = data[te.ind, ] to3 <- function(x) x^3 transforms <- c("sigmoid","sin_deg","exp_dbl","p0","troot","to3") @@ -36,15 +36,15 @@ use.fbms = FALSE # #################################################### -params <- gen.params.gmjmcmc(df.train) -params$loglik$var <- "unknown" +params <- gen.params.gmjmcmc(ncol(df.train) - 1) +params$mlpost$var <- 1 if (use.fbms) { result.default <- fbms(formula = semimajoraxis ~ 1 + . , data = df.train, method = "gmjmcmc", transforms = transforms, params = params) } else { - result.default <- gmjmcmc(df.train, transforms = transforms, params = params) + result.default <- gmjmcmc(df.train[, -1], df.train[, 1], intercept = TRUE, transforms = transforms, params = params) } -summary(result.default,labels = F) +summary(result.default, labels = FALSE) preds <- predict(result.default, df.test[,-1], link = function(x) x) @@ -52,11 +52,11 @@ sqrt(mean((preds$aggr$mean - df.test$semimajoraxis)^2)) #new additional ways to predict using MPM and best model get.best.model(result = result.default) -preds <- predict(get.best.model(result.default), df.test[,-1]) +preds <- predict(get.best.model(result.default), df.test[, -1]) sqrt(mean((preds - df.test$semimajoraxis)^2)) -get.mpm.model(result = result.default,y = df.test$semimajoraxis,x=df.test[,-1]) -preds <- predict(get.mpm.model(result.default,y = df.test$semimajoraxis,x=df.test[,-1]), df.test[,-1]) +get.mpm.model(result = result.default, y = df.test$semimajoraxis, x = df.test[, -1]) +preds <- predict(get.mpm.model(result.default, y = df.test$semimajoraxis, x = df.test[, -1]), df.test[, -1]) sqrt(mean((preds - df.test$semimajoraxis)^2)) #################################################### @@ -70,10 +70,10 @@ set.seed(123) if (use.fbms) { result.P50 <- fbms(data = df.train, method = "gmjmcmc", transforms = transforms, - P=50, N.init=1000, N.final=1000, params = params) + P = 50, N.init = 1000, N.final = 1000, params = params) } else { - result.P50 <- gmjmcmc(df.train, transforms = transforms, - P=50, N.init=1000, N.final=1000, params = params) + result.P50 <- gmjmcmc(df.train[, -1], df.train[, 1], intercept = TRUE, transforms = transforms, + P = 50, N.init = 1000, N.final = 1000, params = params) } summary(result.P50, labels = names(df.train)[-1]) @@ -86,23 +86,23 @@ summary(result.P50, labels = names(df.train)[-1]) set.seed(123) if (use.fbms) { result_parallel <- fbms(data = df.train, method = "gmjmcmc.parallel", transforms = transforms, - runs = 40, cores = 10, P=25,params = params) + runs = 40, cores = 10, P = 25,params = params) } else { - result_parallel <- gmjmcmc.parallel(runs = 40, cores = 10, data = df.train, loglik.pi = gaussian.loglik, - transforms = transforms, P=25,params = params) + result_parallel <- gmjmcmc.parallel(runs = 40, cores = 10, x = df.train[, -1], y = df.train[, 1], intercept = TRUE, loglik.pi = gaussian.loglik, + transforms = transforms, P = 25, params = params) } summary(result_parallel, tol = 0.01) ####### fixed variance -params$loglik$var <- 1 +params$mlpost$var <- 1 set.seed(124) if (use.fbms) { result_parallel_unitphi <- fbms(data = df.train, method = "gmjmcmc.parallel", transforms = transforms, runs = 40, cores = 10, P=25,params = params) } else { - result_parallel_unitphi <- gmjmcmc.parallel(runs = 40, cores = 10, data = df.train, loglik.pi = gaussian.loglik, - transforms = transforms, P=25,params = params) + result_parallel_unitphi <- gmjmcmc.parallel(runs = 40, cores = 10, x = df.train[, -1], y = df.train[, 1], loglik.pi = gaussian.loglik, + transforms = transforms, P = 25, params = params) } summary(result_parallel_unitphi, tol = 0.01) @@ -133,15 +133,15 @@ gaussian.loglik.g <- function (y, x, model, complex, params) #default for N.final = N.init -params$loglik$betaprior <- "hyper-g-n" -params$loglik$r <- 1/dim(df.train)[1] -params$loglik$alpha <- dim(df.train)[1] +params$mlpost$betaprior <- "hyper-g-n" +params$mlpost$r <- 1/dim(df.train)[1] +params$mlpost$alpha <- dim(df.train)[1] set.seed(1) if (use.fbms) { - result_parallel_g <- fbms(data = df.train,family = "custom", method = "gmjmcmc.parallel",loglik.pi = lm.logpost.bas, transforms = transforms, + result_parallel_g <- fbms(data = df.train, family = "custom", method = "gmjmcmc.parallel",loglik.pi = lm.logpost.bas, transforms = transforms, runs = 40, cores = 10, P=25,params = params) } else { - result_parallel_g <- gmjmcmc.parallel(runs = 40, cores = 10, data = df.train, loglik.pi = lm.logpost.bas, + result_parallel_g <- gmjmcmc.parallel(runs = 40, cores = 10, x = df.train[, -1], y = df.train[, 1], loglik.pi = lm.logpost.bas, transforms = transforms, P=25,params = params) } summary(result_parallel_g, tol = 0.01) @@ -282,7 +282,7 @@ for(prior in c("g-prior", "JZS")) { print(paste0("testing ",prior)) - params$loglik <- list(r = 1/dim(df.train)[1], betaprior = prior,alpha = max(dim(df.train)[1],(dim(df.train)[2])^2)) + params$mlpost <- list(r = 1/dim(df.train)[1], betaprior = prior,alpha = max(dim(df.train)[1],(dim(df.train)[2])^2)) #ours are stil a bit faster than the BAS ones, but BAS are relatively fine too @@ -298,8 +298,8 @@ for(prior in c("g-prior", #default for N.final = N.init -params <- gen.params.gmjmcmc(df.train) -params$loglik$g <- dim(df.train)[1] +params <- gen.params.gmjmcmc(ncol(df.train) - 1) +params$mlpost$g <- dim(df.train)[1] tic() result.default <- fbms(formula = semimajoraxis ~ 1 + . , data = df.train, method = "gmjmcmc.parallel",cores = 10, runs = 10, transforms = transforms, loglik.pi = gaussian.loglik.g, params = params, P = 50) time.res = toc() diff --git a/tests_current/Ex2_Sec4_1.R b/tests_current/Ex2_Sec4_1.R index de32a6585f61aad2fc4c2922a7de9166edc2a6bb..5592bdd717893bee1b30c9bbeb74dc8972b74050 100644 --- a/tests_current/Ex2_Sec4_1.R +++ b/tests_current/Ex2_Sec4_1.R @@ -10,7 +10,7 @@ # Logical to decide whether to perform analysis with fbms function # If FALSE then gmjmcmc or gmjmcmc.parallel function is used -use.fbms <- FALSE +use.fbms <- FALSE stronger.singal <- FALSE library(mvtnorm) @@ -42,6 +42,7 @@ X<-scale(X)/sqrt(n) df <- as.data.frame(cbind(y, X)) +colnames(df) <- c("Y", paste0("X", seq_len(ncol(df) - 1))) correct.model beta.k @@ -52,10 +53,9 @@ transforms <- c("sigmoid","sin_deg","exp_dbl","p0","troot","to3") set.seed(123) if (use.fbms) { - result <- result <- fbms(data = df, method = "gmjmcmc", - transforms = transforms, P = 40) + result <- fbms(data = df, method = "gmjmcmc", transforms = transforms, P = 40) } else { - result <- gmjmcmc(df, gaussian.loglik, gaussian.loglik.alpha, transforms, P = 40) + result <- gmjmcmc(x = df[-1], y = df[,1], mlpost_params = list(family = "gaussian", beta_prior = list(type = "robust")), transforms = transforms, P = 40) } summary(result) @@ -65,7 +65,7 @@ if (use.fbms) { result2 <- result <- fbms(data = df, method = "gmjmcmc", transforms = transforms, N.init = 1000, N.final = 5000, P = 40) } else { - result2 <- gmjmcmc(df, transforms = transforms, + result2 <- gmjmcmc(y = df[,1],x = df[-1], transforms = transforms, mlpost_params = list(family = "gaussian", beta_prior = list(type = "robust")), N.init = 1000, N.final = 5000, P = 40) } summary(result2) @@ -84,7 +84,7 @@ set.seed(123) probs.lin <- gen.probs.mjmcmc() -params.lin <- gen.params.mjmcmc(df) +params.lin <- gen.params.mjmcmc(ncol(df) - 1) params.lin$loglik$r <- 1/dim(df)[1] #to set variance to unknown uncomment below @@ -93,16 +93,16 @@ params.lin$loglik$r <- 1/dim(df)[1] if (use.fbms) { result.lin <- fbms(data = df, N = 5000) } else { - result.lin <- mjmcmc(df, N = 5000, probs = probs.lin, - params = params.lin) - + result.lin <- mjmcmc(y = df[,1], x = df[,-1], mlpost_params = list(family = "gaussian", beta_prior = list(type = "Jeffreys-BIC")), N = 5000, probs = probs.lin, params = params.lin) } plot(result.lin) + +#something is wrong with the names, Jon! summary(result.lin) -correct.model +correct.model beta.k @@ -112,7 +112,7 @@ set.seed(123) if (use.fbms) { result.lindef <- fbms(data = df) } else { - result.lindef <- mjmcmc(df) + result.lindef <- mjmcmc(x = df[,-1],y = df[,1]) } diff --git a/tests_current/Ex3_Sec4_1.R b/tests_current/Ex3_Sec4_1.R index 6089c74185225b76d2e0242f074fad8353aeedc0..07edbf0d2eb88391dfb11625acdeb949668bab9b 100644 --- a/tests_current/Ex3_Sec4_1.R +++ b/tests_current/Ex3_Sec4_1.R @@ -32,7 +32,7 @@ probs$gen = c(0,0,0,1) # Candidates for the first MJMCMC round based on correlation with response c.vec = unlist(mclapply(2:ncol(df), function(x)abs(cor(df[,1],df[,x])))) ids = sort(order(c.vec,decreasing=TRUE)[1:50]) -params = gen.params.gmjmcmc(df) +params = gen.params.gmjmcmc(ncol(df) - 1) params$feat$prel.filter <- ids params$feat$check.col <- T @@ -44,7 +44,7 @@ params$feat$pop.max <- 50 # #################################################### n = dim(df)[1]; p=dim(df)[2] -params$loglik$g <- max(n,p^2) # Using recommendation from Fernandez et al (2001) +params$mlpost$g <- max(n,p^2) # Using recommendation from Fernandez et al (2001) #this will be added to the package log_prior <- function(params,complex){ @@ -87,7 +87,7 @@ if(run.parallel) P=50,N.init=1000,N.final=1000,runs=10,cores=10) }else { start = Sys.time() - result_parallel1=gmjmcmc.parallel(data=df,loglik.pi=gaussian.loglik.g,transforms=transforms, + result_parallel1=gmjmcmc.parallel(x = df[, -1], y = df[, 1], loglik.pi=gaussian.loglik.g,transforms=transforms, probs=probs,params=params, P=50,N.init=1000,N.final=1000,runs=10,cores=10) end = Sys.time() @@ -103,7 +103,7 @@ if(run.parallel) method="gmjmcmc.parallel", P=50,N.init=1000,N.final=1000,runs=10,cores=10) } else { - result_parallel2=gmjmcmc.parallel(data=df,loglik.pi=gaussian.loglik.g,transforms=transforms, + result_parallel2=gmjmcmc.parallel(x = df[, -1], y = df[, 1], loglik.pi=gaussian.loglik.g,transforms=transforms, probs=probs,params=params, P=50,N.init=1000,N.final=1000,runs=10,cores=10) } @@ -117,7 +117,7 @@ if(run.parallel) method="gmjmcmc.parallel", P=50,N.init=1000,N.final=1000,runs=10,cores=10) } else { - result_parallel3=gmjmcmc.parallel(data=df,loglik.pi=gaussian.loglik.g,transforms=transforms, + result_parallel3=gmjmcmc.parallel(x = df[, -1], y = df[, 1], loglik.pi=gaussian.loglik.g,transforms=transforms, probs=probs,params=params, P=50,N.init=1000,N.final=1000,runs=10,cores=10) } diff --git a/tests_current/Ex3_Sec4_1_Removed, replaced_by_Ex7.R b/tests_current/Ex3_Sec4_1_Removed, replaced_by_Ex7.R index 35c98f33230eb67f0724d58767be2eb3a3d34a37..7f60d1a46fdcd9ca304bddf3423d6cf88bf6086d 100644 --- a/tests_current/Ex3_Sec4_1_Removed, replaced_by_Ex7.R +++ b/tests_current/Ex3_Sec4_1_Removed, replaced_by_Ex7.R @@ -32,7 +32,7 @@ ids = sort(order(c.vec,decreasing=TRUE)[1:50]) # #################################################### -params = gen.params.gmjmcmc(df) +params = gen.params.gmjmcmc(ncol(df) - 1) params$feat$check.col <- F params$feat$pop.max = 60 params$prel.select <- ids @@ -41,8 +41,8 @@ transforms = c("") probs = gen.probs.gmjmcmc(transforms) probs$gen = c(0,0,0,1) probs$filter=0.8 -params$loglik$var = "unknown" -#params$loglik$r = 1 +params$mlpost$var = "unknown" +#params$mlpost$r = 1 set.seed(123) start.time=Sys.time() @@ -50,7 +50,7 @@ if (use.fbms) { result1 <- fbms(data = df, method = "gmjmcmc", transforms = transforms, probs = probs, params = params, P=25) } else { - result1 = gmjmcmc(data = df, transforms = transforms, + result1 = gmjmcmc(x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25) } show(Sys.time()-start.time) @@ -64,7 +64,7 @@ if (use.fbms) { result2 <- fbms(data = df, method = "gmjmcmc", transforms = transforms, probs = probs, params = params, P=25) } else { - result2 = gmjmcmc(data = df, transforms = transforms, + result2 = gmjmcmc(x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25) } @@ -108,11 +108,11 @@ min(cor(X.best)) ids3 = ids transforms = c("") -params = gen.params.gmjmcmc(df[,ids3]) +params = gen.params.gmjmcmc(ncol(df[,ids3]) - 1) params$feat$check.col <- F params$feat$pop.max = 60 params$prel.select <- ids3 -params$loglik$var <- "unknown" +params$mlpost$var <- "unknown" probs = gen.probs.gmjmcmc(transforms) probs$gen = c(0,0,0,1) @@ -123,7 +123,7 @@ if (use.fbms) { result3 <- fbms(data = df, method = "gmjmcmc", transforms = transforms, probs = probs, params = params, P=25) } else { - result3 = gmjmcmc(data = df, transforms = transforms, + result3 = gmjmcmc(x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25) } @@ -154,7 +154,7 @@ if (use.fbms) { transforms = transforms, probs = probs, params = params, P=25, N.init=500, N.final=500) } else { - result_parallel = gmjmcmc.parallel(runs = 40, cores = 40,data = df, + result_parallel = gmjmcmc.parallel(runs = 40, cores = 40, x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25, N.init=500, N.final=500) } @@ -187,7 +187,7 @@ if (use.fbms) { transforms = transforms, probs = probs, params = params, P=25, N.init=500, N.final=500) } else { - result_parallel2 = gmjmcmc.parallel(runs = 40, cores = 10,data = df, + result_parallel2 = gmjmcmc.parallel(runs = 40, cores = 10, x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25, N.init=500, N.final=500) } diff --git a/tests_current/Ex4_Sec4_2.R b/tests_current/Ex4_Sec4_2.R index 9436204a38b4a51ded316860a7b2c678faa916e8..6e0c158612fa385e2684f9b34e591e0bf4f2c729 100644 --- a/tests_current/Ex4_Sec4_2.R +++ b/tests_current/Ex4_Sec4_2.R @@ -35,8 +35,8 @@ df <- as.data.frame(cbind(y, X)) transforms <- c("") -params <- gen.params.gmjmcmc(df) -#params$loglik$var = "unknown" #this will set the variance to unknwon +params <- gen.params.gmjmcmc(ncol(df) - 1) +#params$mlpost$var = "unknown" #this will set the variance to unknwon probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(1,0,0,1) #Include interactions and mutations @@ -51,7 +51,7 @@ if (use.fbms) { result <- fbms(data = df, method = "gmjmcmc", transforms = transforms, probs = probs, params = params, P=40) } else { - result <- gmjmcmc(df, transforms = transforms, params = params, probs = probs, P=40) + result <- gmjmcmc(x = df[, -1], y = df[, 1], transforms = transforms, params = params, probs = probs, P=40) } summary(result) @@ -61,7 +61,7 @@ if (use.fbms) { result2 <- fbms(data = df, method = "gmjmcmc", transforms = transforms, probs = probs, params = params, P=40) } else { - result2 <- gmjmcmc(df, transforms = transforms, N.init = 1000, N.final = 5000, + result2 <- gmjmcmc(x = df[, -1], y = df[, 1], transforms = transforms, N.init = 1000, N.final = 5000, probs = probs, params = params, P = 40) } summary(result2, tol = 0.01) @@ -81,7 +81,7 @@ if (use.fbms) { runs = 40, cores = 10, probs = probs, params = params, P=25) } else { - result_parallel = gmjmcmc.parallel(runs = 40, cores = 10, data = df, + result_parallel = gmjmcmc.parallel(runs = 40, cores = 10, x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25) } @@ -97,7 +97,7 @@ if (use.fbms) { runs = 40, cores = 10, N.init=1000, N.final=2000, probs = probs, params = params, P=25) } else { - result_parallel2 = gmjmcmc.parallel(runs = 40, cores = 10, data = df, + result_parallel2 = gmjmcmc.parallel(runs = 40, cores = 10, x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25, N.init=1000, N.final=2000) } @@ -120,7 +120,7 @@ set.seed(123) if (use.fbms) { result.lin <- fbms(data = df, N = 5000) } else { - result.lin <- mjmcmc(df, gaussian.loglik, N = 5000) + result.lin <- mjmcmc(x = df[, -1], y = dt[, 1], gaussian.loglik, N = 5000) } plot(result.lin) diff --git a/tests_current/Ex5_Sec4_3.R b/tests_current/Ex5_Sec4_3.R index e9bfe5ffb158dafc83af82c578b6261caec491d2..c085ef89a18a331835b71afb4eaf02172f9c0ddb 100644 --- a/tests_current/Ex5_Sec4_3.R +++ b/tests_current/Ex5_Sec4_3.R @@ -40,11 +40,11 @@ df$y = rnorm(n =n, mean = mu,sd = 1) transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p0p2","p0p3","p0p05","p0pm05","p0pm1","p0pm2") probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(0,1,0,1) # Only modifications! -params <- gen.params.gmjmcmc(df) +params <- gen.params.gmjmcmc(ncol(df) - 1) params$feat$D <- 1 # Set depth of features to 1 #to set variance to unknown uncomment below -#params$loglik$var <- "unknown" +#params$mlpost$var <- "unknown" #################################################### # @@ -58,7 +58,7 @@ if (use.fbms) { result <- fbms(data = df, method = "gmjmcmc", transforms = transforms, probs = probs, params = params) } else { - result <- gmjmcmc(df, transforms = transforms, probs = probs, params = params) + result <- gmjmcmc(x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params) } summary(result, labels = names(df[-1])) #plot.diagn(result,FUN = median) @@ -77,7 +77,7 @@ if (use.fbms) { result_parallel <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel = gmjmcmc.parallel(runs = 40, cores = 10, data = df, + result_parallel = gmjmcmc.parallel(runs = 40, cores = 10, x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25) } #summary(result_parallel, labels = names(df[-1])) @@ -94,7 +94,7 @@ if (use.fbms) { result_parallel2 <- fbms(runs = 40, cores = 10,data = df, method = "gmjmcmc.parallel", transforms = transforms, probs = probs, params = params, P=25, N.init=1000, N.final=2000) } else { - result_parallel2 = gmjmcmc.parallel(runs = 40, cores = 10,data = df, + result_parallel2 = gmjmcmc.parallel(runs = 40, cores = 10, x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25, N.init=1000, N.final=2000) } @@ -111,7 +111,7 @@ if (use.fbms) { result_parallel3 <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, probs = probs, params = params, P=50, N.init=2000, N.final=4000) } else { - result_parallel3 = gmjmcmc.parallel(runs = 40, cores = 40, data = df, transforms = transforms, + result_parallel3 = gmjmcmc.parallel(runs = 40, cores = 40, x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=50, N.init=2000, N.final=4000) } #summary(result_parallel3, labels = names(df[-1])) diff --git a/tests_current/Ex6_Sec4_4.R b/tests_current/Ex6_Sec4_4.R index 76071ca3ff5b941a09d0e82c3cdf2cd088882937..81e8b45e9944752406959b25dc8c7333029338a4 100644 --- a/tests_current/Ex6_Sec4_4.R +++ b/tests_current/Ex6_Sec4_4.R @@ -65,9 +65,9 @@ transforms <- c("sigmoid") probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(0,0,1,1) #Only projections! -params <- gen.params.gmjmcmc(df.training) -#params$loglik$r = 0.9 -#params$loglik$var = "unknown" +params <- gen.params.gmjmcmc(ncol(df.training) - 1) +#params$mlpost$r = 0.9 +#params$mlpost$var = "unknown" ############################################################################# @@ -83,7 +83,7 @@ if (use.fbms) { result <- fbms(data = df.training, method = "gmjmcmc", transforms = transforms, probs = probs, params = params) } else { - result <- gmjmcmc(df.training, transforms = transforms, probs = probs,params = params) + result <- gmjmcmc(x = df.training[, -1], y = df.training[, 1], transforms = transforms, probs = probs,params = params) } summary(result) @@ -110,7 +110,7 @@ if (use.fbms) { result_parallel <- fbms(data = df.training, method = "gmjmcmc.parallel", runs = 4, cores = 4, transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel = gmjmcmc.parallel(runs = 4, cores = 4, data = df.training, + result_parallel = gmjmcmc.parallel(runs = 4, cores = 4, x = df.training[, -1], y = df.training[, 1], loglik.pi =gaussian.loglik,loglik.alpha = gaussian.loglik.alpha, transforms = transforms, probs = probs, params = params, P=25) } @@ -143,7 +143,7 @@ if (use.fbms) { result.a3 <- fbms(data = df.training, method = "gmjmcmc", transforms = transforms, probs = probs, params = params) } else { - result.a3 <- gmjmcmc(df.training, transforms = transforms, probs = probs, params = params) + result.a3 <- gmjmcmc(x = df.training[, -1], y = df.training[, 1], transforms = transforms, probs = probs, params = params) } summary(result.a3) @@ -173,7 +173,7 @@ if (use.fbms) { result_parallel.a3 <- fbms(data = df.training, method = "gmjmcmc.parallel", runs = 40, cores = 40, transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel.a3 = gmjmcmc.parallel(runs = 40, cores = 40, data = df.training, + result_parallel.a3 = gmjmcmc.parallel(runs = 40, cores = 40, x = df.training[, -1], y = df.training[, 1], loglik.pi =gaussian.loglik,loglik.alpha = gaussian.loglik.alpha, transforms = transforms, probs = probs, params = params, P=25) } @@ -212,7 +212,7 @@ if (use.fbms) { result.fp <- fbms(data = df.training, method = "gmjmcmc.parallel", runs = 40, cores = 40, transforms = transforms, probs = probs, params = params, P=25) } else { - result.fp <- gmjmcmc.parallel(runs = 40, cores = 40, data = df.training, + result.fp <- gmjmcmc.parallel(runs = 40, cores = 40, x = df.training[, -1], y = df.training[, 1], loglik.pi =gaussian.loglik,loglik.alpha = gaussian.loglik.alpha, transforms = transforms, probs = probs, params = params, P=25) } diff --git a/tests_current/Ex8_Sec5.2_logic_regression.R b/tests_current/Ex8_Sec5.2_logic_regression.R index 95bb2ce3bfc7072b05e759b3196e38d124d8d10c..cf79e97d6b551c454bcb0bea4464eebf0a667508 100644 --- a/tests_current/Ex8_Sec5.2_logic_regression.R +++ b/tests_current/Ex8_Sec5.2_logic_regression.R @@ -44,8 +44,8 @@ probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(1,1,0,1) #No projections allowed -params <- gen.params.gmjmcmc(df.training) -params$loglik$p <- 50 #number of leaves +params <- gen.params.gmjmcmc(ncol(df.training) - 1) +params$mlpost$p <- 50 #number of leaves params$feat$pop.max <- 31 params$feat$L <- 15 ############################################## @@ -98,12 +98,12 @@ if (use.fbms) { } else { # result <- gmjmcmc(df.training, transforms = transforms, probs = probs) - result <- gmjmcmc(df.training, loglik.pi = estimate.logic.lm,N.init = 500,N.final = 500, , P = 25, + result <- gmjmcmc(x = df.training[, -1], y = df.training[, 1], loglik.pi = estimate.logic.lm,N.init = 500,N.final = 500, , P = 25, transforms = transforms,params = params, probs = probs) } summary(result) -mpm <- get.mpm.model(result,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$loglik) +mpm <- get.mpm.model(result, y = df.training$Y2, x = df.training[,-1], family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) mbest <- get.best.model(result) @@ -143,16 +143,16 @@ points(pred_mpm,df.test$Mean,col = 4) set.seed(5002) if (use.fbms) { - result_parallel <- fbms(data = df.training, family = "custom", loglik.pi = estimate.logic.lm,N.init = 500,N.final = 500, + result_parallel <- fbms(data = df.training, family = "custom", loglik.pi = estimate.logic.lm, N.init = 500, N.final = 500, method = "gmjmcmc.parallel", runs = 16, cores = 8, transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel = gmjmcmc.parallel(runs = 16, cores = 8, data = df.training, + result_parallel = gmjmcmc.parallel(runs = 16, cores = 8, x = df.training[, -1], y = df.training[, 1], loglik.pi = estimate.logic.lm,N.init = 500,N.final = 500, transforms = transforms, probs = probs, params = params, P=25) } summary(result_parallel) -mpm <- get.mpm.model(result_parallel,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$loglik) +mpm <- get.mpm.model(result_parallel, y = df.training$Y2, x = df.training[,-1], family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) mbest <- get.best.model(result_parallel) @@ -192,14 +192,14 @@ probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(1,1,0,1) #No projections allowed probs$filter <- 0.6 -params <- gen.params.gmjmcmc(df.training) -params$loglik$p <- 50 #number of leaves -params$loglik$n <- n #used in specifying parameter v of the tCCH prior -params$loglik$p.a <- 1 -params$loglik$p.b <- 1 -params$loglik$p.r <- 1.5 -params$loglik$p.s <- 0 -params$loglik$p.k <- 1 +params <- gen.params.gmjmcmc(ncol(df.training) - 1) +params$mlpost$p <- 50 #number of leaves +params$mlpost$n <- n #used in specifying parameter v of the tCCH prior +params$mlpost$p.a <- 1 +params$mlpost$p.b <- 1 +params$mlpost$p.r <- 1.5 +params$mlpost$p.s <- 0 +params$mlpost$p.k <- 1 params$feat$pop.max <- 31 library(BAS) #needed for hypergeometric functions @@ -207,7 +207,7 @@ estimate.logic.tcch = function(y, x, model, complex, params) { if (length(params) == 0) - params <- list(r = 1/dim(x)[1]) + params <- list(r = 1 / dim(x)[1]) suppressWarnings({ mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) }) @@ -248,12 +248,12 @@ if (use.fbms) { } else { # result <- gmjmcmc(df.training, transforms = transforms, probs = probs) - result <- gmjmcmc(df.training, loglik.pi = estimate.logic.tcch,N.init = 500,N.final = 500, P = 25, + result <- gmjmcmc(x = df.training[, -1], y = df.training[, 1], loglik.pi = estimate.logic.tcch,N.init = 500,N.final = 500, P = 25, transforms = transforms,params = params, probs = probs) } summary(result) -mpm <- get.mpm.model(result,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$loglik) +mpm <- get.mpm.model(result, y = df.training$Y2, x = df.training[,-1], family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) mbest <- get.best.model(result) @@ -290,12 +290,12 @@ if (use.fbms) { method = "gmjmcmc.parallel", runs = 16, cores = 8, transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel = gmjmcmc.parallel(runs = 16, cores = 8, data = df.training, + result_parallel = gmjmcmc.parallel(runs = 16, cores = 8, x = df.training[, -1], y = df.training[, 1], loglik.pi = estimate.logic.tcch,N.init = 500,N.final = 500, transforms = transforms, probs = probs, params = params, P=25) } summary(result_parallel) -mpm <- get.mpm.model(result_parallel,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$loglik) +mpm <- get.mpm.model(result_parallel,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) mbest <- get.best.model(result_parallel) diff --git a/tests_current/Ex8_Sec5_2.R b/tests_current/Ex8_Sec5_2.R index 9e4c15ad67abb9f49c4e46bcd5b672e57f58cffb..c2a15bae0aa659a2913dfedf0a27adb8f03c1a5d 100644 --- a/tests_current/Ex8_Sec5_2.R +++ b/tests_current/Ex8_Sec5_2.R @@ -66,8 +66,8 @@ transforms <- c("sigmoid") probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(0,0,1,1) #Only projections! -params <- gen.params.gmjmcmc(df.training) -params$loglik$r = 0.2 +params <- gen.params.gmjmcmc(ncol(df.training) - 1) +params$mlpost$r = 0.2 ############################################## # @@ -116,7 +116,7 @@ if (use.fbms) { } else { # result <- gmjmcmc(df.training, transforms = transforms, probs = probs) - result <- gmjmcmc(df.training, loglik.pi = gaussian.loglik.gap.open.prior, + result <- gmjmcmc(x = df.training[, -1], y = df.training[, 1], loglik.pi = gaussian.loglik.gap.open.prior, transforms = transforms, probs = probs) } @@ -147,7 +147,7 @@ if (use.fbms) { method = "gmjmcmc.parallel", runs = 40, cores = 40, transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel = gmjmcmc.parallel(runs = 40, cores = 40, data = df.training, + result_parallel = gmjmcmc.parallel(runs = 40, cores = 40, x = df.training[, -1], y = df.training[, 1], loglik.pi = gaussian.loglik.gap.open.prior, loglik.alpha = gaussian.loglik.alpha, transforms = transforms, probs = probs, params = params, P=25) @@ -182,7 +182,7 @@ if (use.fbms) { method = "gmjmcmc", transforms = transforms, probs = probs, params = params) } else { - result.a3 <- gmjmcmc(df.training, transforms = transforms, probs = probs, params = params) + result.a3 <- gmjmcmc(x = df.training[, -1], y = df.training[, 1], transforms = transforms, probs = probs, params = params) } summary(result.a3) @@ -214,7 +214,7 @@ if (use.fbms) { method = "gmjmcmc.parallel", runs = 40, cores = 40, transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel.a3 = gmjmcmc.parallel(runs = 40, cores = 40, data = df.training, + result_parallel.a3 = gmjmcmc.parallel(runs = 40, cores = 40, x = df.training[, -1], y = df.training[, 1], loglik.pi = gaussian.loglik.gap.open.prior, loglik.alpha = gaussian.loglik.alpha, transforms = transforms, probs = probs, params = params, P=25) @@ -252,7 +252,7 @@ if (use.fbms) { result.fp <- fbms(data = df.training, method = "gmjmcmc.parallel", runs = 40, cores = 40, transforms = transforms, probs = probs, params = params, P=25) } else { - result.fp <- gmjmcmc.parallel(runs = 40, cores = 40, data = df.training, + result.fp <- gmjmcmc.parallel(runs = 40, cores = 40, x = df.training[, -1], y = df.training[, 1], loglik.pi =gaussian.loglik,loglik.alpha = gaussian.loglik.alpha, transforms = transforms, probs = probs, params = params, P=25) } diff --git a/tests_current/Ex9_Sec6_1.R b/tests_current/Ex9_Sec6_1.R index ac0e6f7a29e065545cc139df08c769a61d1ffa70..6a20fc5c90fd732668f15c25eda795b46777f8ec 100644 --- a/tests_current/Ex9_Sec6_1.R +++ b/tests_current/Ex9_Sec6_1.R @@ -38,7 +38,7 @@ transforms <- c("sigmoid","sin_deg","exp_dbl","p0","troot","to3") probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(1,1,1,1) -params <- gen.params.gmjmcmc(df) +params <- gen.params.gmjmcmc(ncol(df) - 1) params$feat$check.col <- F #################################################### # @@ -55,7 +55,7 @@ set.seed(6001) -params$loglik = list(r = 1/dim(df)[1], family = "binomial", betaprior = g.prior(100), laplace = F) +params$mlpost = list(r = 1/dim(df)[1], family = "binomial", betaprior = g.prior(100), laplace = F) result <- fbms(data = df, method = "gmjmcmc", family = "custom", transforms = transforms, probs = probs, loglik.pi = glm.logpost.bas, params = params,P=3) @@ -79,12 +79,12 @@ if (use.fbms) { result <- fbms(data = df, method = "gmjmcmc", family = "binomial", transforms = transforms, probs = probs, params = params) } else { - result <- gmjmcmc(df, logistic.loglik, transforms + result <- gmjmcmc(x = df[, -1], y = df[, 1], logistic.loglik, transforms = transforms, probs = probs, params = params) } # Default tuning parameters for logistic regression: # -# params$loglik$r = 1/n +# params$mlpost$r = 1/n summary(result) @@ -125,7 +125,7 @@ if (use.fbms) { runs = 40, cores = 40, transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel = gmjmcmc.parallel(runs = 40, cores = 40, data = df, + result_parallel = gmjmcmc.parallel(runs = 40, cores = 40, x = df[, -1], y = df[, 1], loglik.pi = logistic.loglik, transforms = transforms, probs = probs, params = params, P=25) } diff --git a/tests_current/some tests.R b/tests_current/some tests.R new file mode 100644 index 0000000000000000000000000000000000000000..351318136f9ea1e25910d455c996cb031a983ff3 --- /dev/null +++ b/tests_current/some tests.R @@ -0,0 +1,114 @@ +# Validation script for fbms.mlik.master +# Tests all supported (family, prior) combinations with required parameters + +set.seed(42) # Ensure reproducibility + +# Generate synthetic data +gen_data <- function(family) { + n <- 50 + p <- 3 + x <- cbind(1, matrix(rnorm(n * p), n, p)) # Include intercept + beta <- c(1, -1, 0.5, -0.5) + + if (family == "gaussian") { + y <- x %*% beta + rnorm(n, mean = 0, sd = 1000) + } else if (family == "binomial") { + prob <- 1 / (1 + exp(-x %*% beta + rnorm(n, mean = 0, sd = 1))) + y <- rbinom(n, 1, prob) + } else if (family == "poisson") { + lambda <- exp(x %*% beta + rnorm(n, mean = 0, sd = 1)) + y <- rpois(n, lambda) + } else if (family == "gamma") { + shape <- 2 + rate <- exp(-x %*% beta + rnorm(n, mean = 0, sd = 0.5)) + y <- rgamma(n, shape = shape, rate = rate) + } else { + stop("Unsupported family") + } + + list(y = as.vector(y), x = x) +} + +# Define prior lists +glm_and_gaussian_priors <- c("ZS-adapted", "beta.prime", "EB-local", "g-prior", "hyper-g", "hyper-g-n", + "intrinsic", "Jeffreys", "uniform", "benchmark", "robust", "Jeffreys-BIC", + "CH", "tCCH", "TG") +gaussian_only_priors <- c("ZS-null", "ZS-full", "hyper-g-laplace", "AIC", "JZS","EB-global") + +glm_priors <- glm_and_gaussian_priors +gaussian_priors <- c(glm_and_gaussian_priors, gaussian_only_priors) + +families <- c("gaussian", "binomial", "poisson", "gamma") + +# Required parameters for priors +prior_params <- list( + "g-prior" = list(g = 10,a = 10), + "hyper-g" = list(a = 3), + "hyper-g-n" = list(a = 3), + "ZS-null" = list(a = 3), + "ZS-full" = list(a = 500), + "hyper-g-laplace" = list(a = 3), + "AIC" = list(a = 3), + "JZS" = list(a = 3), + "EB-global" = list(a = 3), + "EB-local" = list(a = 3), + "CH" = list(a = 1, b = 2, s = 1), + "tCCH" = list(a = 1, b = 2, s = 0, rho = 1, v = 1, k = 1), + "TG" = list(a = 2, s = 1) +) + +# Testing loop +for (family in families) { + priors <- if (family == "gaussian") gaussian_priors else glm_priors + data <- gen_data(family) + + cat("\n===== Testing family:", family, "=====") + + for (prior in priors) { + + print(prior) + + params <- list(family = family,beta_prior = list(type = prior)) + + params_old <- list(family = family,prior_beta = prior) + + + # Add required parameters if applicable + + if (prior %in% names(prior_params)) { + params$beta_prior <- c(params$beta_prior, prior_params[[prior]]) + } + + if (prior %in% names(prior_params)) { + params_old <- c(params_old, prior_params[[prior]]) + } + + # Run the model + tryCatch({ + + set.seed(1) + result <- fbms.mlik.master(data$y, data$x, model = c(T, rep(TRUE, ncol(data$x) - 1)), + complex = list(oc = 1), params = params) + set.seed(1) + result.null <- fbms.mlik.master(data$y, data$x, model = c(T, T, rep(FALSE, ncol(data$x) - 2)), + complex = list(oc = 1), params = params) + set.seed(1) + result.old <- fbms.mlik.master_old(data$y, data$x, model = c(T, rep(TRUE, ncol(data$x) - 1)), + complex = list(oc = 1), params = params_old)# + set.seed(1) + result.null.old <- fbms.mlik.master_old(data$y, data$x, model = c(T, T, rep(FALSE, ncol(data$x) - 2)), + complex = list(oc = 1), params = params_old) + + + crit_rounded <- round(result$crit - result.null$crit - result.old$crit + result.null.old$crit, 8) + coefs_mean <- round(mean(result$coefs) - mean(result.null$coefs) - mean(result.old$coefs) + mean(result.null.old$coefs), 8) + + cat(sprintf("\nPrior: %-15s -> crit: %8.4f, mean(coefs): %8.4f", prior, crit_rounded, coefs_mean)) + + print("Finished") + + }, error = function(e) { + cat(sprintf("\nPrior: %-15s -> ERROR: %s", prior, conditionMessage(e))) + }) + } +} diff --git a/vignettes/FBMS-guide.Rmd b/vignettes/FBMS-guide.Rmd index c95c0c247b22ae9c06d3b1290e6ea49746157987..af1a9527c35ec58618d09a37a0fcd15df3664d52 100644 --- a/vignettes/FBMS-guide.Rmd +++ b/vignettes/FBMS-guide.Rmd @@ -7,8 +7,8 @@ vignette: > %\VignetteEncoding{UTF-8} %\VignetteDepends{fastglm, FBMS} --- -The `FBMS` package provides functions for Flexible Bayesian Model Selection and Model Averaging. +The `FBMS` package provides functions for Flexible Bayesian Model Selection and Model Averaging. ```{r, include = FALSE} knitr::opts_chunk$set( @@ -16,46 +16,7 @@ knitr::opts_chunk$set( comment = "#>" ) ``` -# Examples -Below are provided examples on how to run the algorithm and what the results tell us, we begin by loading the package and a supplied dataset -```{r setup} -library(FBMS) -library(GenSA) -library(fastglm) -data("breastcancer") -bc <- breastcancer[,c(ncol(breastcancer),2:(ncol(breastcancer)-1))] -``` -We need some nonlinear transformations for the algorithm to use, the package offers a selection of these, but you are also able to define your own. Here we create a list of the ones to use, all but one are supplied by the package. -```{r} -to3 <- function(x) x^3 -transforms <- c("sigmoid","sin_deg","exp_dbl","p0","troot","to3") -``` -By calling two functions in the package, a list of probabilities for various parts of the algorithm, as well as a list of parameters are created. The list of probabilities needs the list of transformations to be able to create the vector of probabilities for the different transformations -```{r} -probs <- gen.probs.gmjmcmc(transforms) -params <- gen.params.gmjmcmc(bc) -``` -We can use one of the supplied likelihood functions, but here we demonstrate how to create our own, it takes four arguments, the dependent $y$ variable, the matrix $X$ containing all independent variables, the model as a logical vector specifying the columns of $X$, and a list of complexity measures for the features involved in the model -```{r} -loglik.example <- function (y, x, model, complex, params) { - r <- 20/223 - suppressWarnings({mod <- fastglm(as.matrix(x[,model]), y, family=binomial())}) - ret <- (-(mod$deviance -2*log(r)*sum(complex$width)))/2 - return(list(crit=ret, coefs=mod$coefficients)) -} -``` -To be able to calculate the alphas when using for example strategy 3 as per Hubin et al., we need a function for the log likelihood, in this example we will use the function supplied by the package called `logistic.loglik.alpha`. With that function as a starting point, you can also create your own function for this. We also adjust our parameter list to use the third strategy. -```{r} -params$feat$alpha <- "random" -``` +# Examples -We are now ready to run the algorithm, in this vignette we will only run very few iterations for demonstration purposes, but the only thing that needs to be changed are the number or populations to visit `T`, the number of iterations per population `N` and the number of iterations for the final population `N.final` -```{r} -set.seed(1234) -result <- gmjmcmc(bc, loglik.example, logistic.loglik.alpha, transforms, P=3, N=30, N.final=60, probs, params) -``` -We can then summarize the results using the supplied function and get a plot of the importance of the parameters in the last population of features -```{r, fig.width=6, fig.height=6} -plot(result) -``` +Must be provided based on test_currect folder