From 477bee6e47e0848a1186809227567e3166318356 Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Mon, 17 Mar 2025 08:43:25 +0100 Subject: [PATCH 01/36] * Take data on the format x, y and number of fixed variables instead of assuming y has dimension n*1 and always adding an intercept. --- R/arguments.R | 5 +- R/fbms.R | 62 ++++++++--------- R/feature.R | 2 +- R/feature_generation.R | 22 +++--- R/gmjmcmc.R | 10 +-- R/gmjmcmc_support.R | 44 ++++++------ R/local_optim.R | 10 ++- R/mjmcmc.R | 15 ++-- R/parallel.R | 6 +- R/predict.R | 128 ++++++++--------------------------- R/results.R | 18 ++--- tests/testthat/test_mjmcmc.R | 8 +-- 12 files changed, 128 insertions(+), 202 deletions(-) diff --git a/R/arguments.R b/R/arguments.R index 854b4f2..812a9c0 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -191,8 +191,7 @@ gen.params.mjmcmc <- function (data) { ### Create a list of parameters for the algorithm ## Get the dimensions of the data to set parameters based on it - data.dim <- data.dims(data) - ncov <- data.dim[2] - 2 + ncov <- ncol(data$x) - data$fixed ## Local optimization parameters sa_kern <- list(probs=c(0.1, 0.05, 0.2, 0.3, 0.2, 0.15), @@ -317,7 +316,7 @@ gen.params.gmjmcmc <- function (data) { # Get mjmcmc params params <- gen.params.mjmcmc(data) - ncov <- ncol(data) - 2 + ncov <- ncol(data$x) - data$fixed feat_params <- list(D = 5, L = 15, # Hard limits on feature complexity alpha = "unit", # alpha strategy ("unit" = None, "deep" strategy 3 from Hubin et al., "random" fully Bayesian strategy) diff --git a/R/fbms.R b/R/fbms.R index c4e4e60..2a99c5f 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -62,62 +62,62 @@ fbms <- function(formula = NULL, family = "gaussian", data = NULL, impute = FALS Y <- model.response(mf, "any") X <- model.matrix(formula, data = data)[, -1] + intercept <- attr(terms(formula, data = data), "intercept") + if (intercept) X <- cbind(1, X) mis.Y <- which(is.na(Y)) - if(length(mis.Y)>0) - { + if (length(mis.Y) > 0) { warning("Missing values in the response. Dropped.") - df <- data.frame(Y[-c(mis.Y)], X[-c(mis.Y),]) - } else df <- data.frame(Y, X) + Y <- Y[-c(mis.Y)] + X <- X[-c(mis.Y), ] + } - mis.All <- sum(is.na(df)) + mis.X <- sum(is.na(X)) imputed <- NULL - if(impute & mis.All>0) - { + if (impute && mis.X > 0) { print("Imputing missing values!") - na.matr <- data.frame(1*(is.na(df))) - names(na.matr) <- paste0("mis_",names(na.matr)) + na.matr <- data.frame(1 * (is.na(X))) + names(na.matr) <- paste0("mis_", names(na.matr)) cm <- colMeans(na.matr) - na.matr <- na.matr[,cm!=0] - for (i in seq_along(df)){ - df[[i]][is.na(df[[i]])] <- median(df[[i]], na.rm = TRUE) + na.matr <- na.matr[, cm != 0] + for (i in seq_along(X)) { + X[[i]][is.na(X[[i]])] <- median(X[[i]], na.rm = TRUE) } - imputed <- names(df)[cm!=0] - df <- data.frame(df,na.matr) + imputed <- names(X)[cm != 0] + X <- data.frame(X, na.matr) rm(na.matr) rm(cm) print("Continue to sampling!") - } else if(mis.All>0){ + } else if (mis.X > 0) { print("Dropping missing values!") } } else { - df <- data + Y <- data[, 1] + X <- cbind(1, data[, -1]) + intercept <- TRUE imputed <- NULL na.opt <- getOption("na.action") - if(impute) - { - options(na.action='na.pass') + if (impute) { + options(na.action = 'na.pass') stop("Imputation is only implemented when formula is provided.\n Please specify formula and rerun!") + } else { + options(na.action = 'na.omit') } - else - options(na.action='na.omit') } if (method == "mjmcmc.parallel") - res <- mjmcmc.parallel(df, loglik.pi, verbose = verbose, ...) + res <- mjmcmc.parallel(X, Y, loglik.pi, fixed = intercept, verbose = verbose, ...) else if (method == "mjmcmc") - res <- mjmcmc(df, loglik.pi, verbose = verbose, ...) - else if (method == "gmjmcmc.parallel") { - res <- gmjmcmc.parallel(data = df, loglik.pi = loglik.pi, verbose = verbose,...) - } - + res <- mjmcmc(X, Y, loglik.pi, fixed = intercept, verbose = verbose, ...) + else if (method == "gmjmcmc.parallel") + res <- gmjmcmc.parallel(x = X, y = Y, loglik.pi = loglik.pi, fixed = intercept, verbose = verbose,...) else if (method == "gmjmcmc") - res <- gmjmcmc(df, loglik.pi, verbose = verbose, ...) + res <- gmjmcmc(X, Y, loglik.pi, fixed = intercept, verbose = verbose, ...) else - stop("Error: Method must be one of gmjmcmc, gmjmcmc.parallel,mjmcmc or mjmcmc.parallel!") + stop("Error: Method must be one of gmjmcmc, gmjmcmc.parallel, mjmcmc or mjmcmc.parallel!") attr(res, "imputed") <- imputed - attr(res, "all_names") <- names(df)[1:(dim(df)[2]-1)] - options(na.action=na.opt) + attr(res, "all_names") <- names(X)[1:(dim(X)[2] - 1)] + options(na.action = na.opt) return(res) } \ No newline at end of file diff --git a/R/feature.R b/R/feature.R index 0807175..8226e72 100644 --- a/R/feature.R +++ b/R/feature.R @@ -152,7 +152,7 @@ print.feature <- function (x, dataset = FALSE, alphas = FALSE, labels = FALSE, r } # This is a plain covariate else if (is.numeric(feat)) { - if (dataset) fString <- paste0("data[,", feat + 2, "]") + if (dataset) fString <- paste0("data$x[,", feat, "]") else if (labels[1] != F) fString <- labels[feat] else fString <- paste0("x", feat) } else stop("Invalid feature structure") diff --git a/R/feature_generation.R b/R/feature_generation.R index 999384c..1354012 100644 --- a/R/feature_generation.R +++ b/R/feature_generation.R @@ -75,26 +75,30 @@ check.collinearity <- function (proposal, features, F.0.size, data, mock) { # Add the proposal to the feature list for evaluation features[[length(features) + 1]] <- proposal # Generate mock data to test with (avoiding too costly computations) - if (mock) - mock.data <- matrix(c(runif((F.0.size * 2), -100, 100), rep(1, F.0.size * 2), - runif((F.0.size * 2) * (F.0.size), -100, 100)), F.0.size * 2, F.0.size + 2) - else - mock.data <- check.data(data[seq_len(min(F.0.size * 2, dim(data)[1])), ], FALSE) + n <- F.0.size * 2 + if (mock) { + mock.data <- list(x = matrix(runif(n * (F.0.size), -100, 100), n, F.0.size), + y = matrix(runif(n * (ncol(data$y)), -100, 100), n, ncol(data$y)), + fixed = data$fixed) + } else { + obs_idx <- seq_len(min(n, nrow(data$x))) + mock.data <- check.data(data$x[obs_idx, ], data$y[obs_idx, ], data$fixed, FALSE) + } # Use the mock data to precalc the features mock.data.precalc <- precalc.features(mock.data, features) # Fit a linear model with the mock data precalculated features - linearmod <- lm(as.data.frame(mock.data.precalc[, -2])) + linearmod <- lm.fit(mock.data.precalc$x, mock.data.precalc$y) # Check if all coefficients were possible to calculate if (sum(is.na(linearmod$coefficients)) == 0) return(FALSE) else return(TRUE) } # Generate features to represent the covariates, just takes the count needed -gen.covariates <- function (count) { +gen.covariates <- function (data) { features <- list() - for (i in 1:count) { + for (i in (data$fixed + 1):ncol(data$x)) { features <- c(features, i) - class(features[[i]]) <- "feature" + class(features[[length(features)]]) <- "feature" } return(features) } \ No newline at end of file diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index 5cd999d..10fb2b3 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -46,10 +46,12 @@ NULL #' #' @export gmjmcmc gmjmcmc <- function ( - data, + x, + y, loglik.pi = gaussian.loglik, loglik.alpha = gaussian.loglik.alpha, transforms, + fixed = 0, P = 10, N.init = 100, N.final = 100, @@ -59,8 +61,7 @@ gmjmcmc <- function ( verbose = TRUE ) { # Verify that the data is well-formed - labels <- names(data)[-1] - data <- check.data(data, verbose) + data <- check.data(x, y, fixed, verbose) # Generate default probabilities and parameters if there are none supplied. if (is.null(probs)) probs <- gen.probs.gmjmcmc(transforms) @@ -88,7 +89,7 @@ gmjmcmc <- function ( best.margs <- vector("list", P) # Create first population - F.0 <- gen.covariates(ncol(data) - 2) + F.0 <- gen.covariates(data) if (is.null(params$prel.select)) S[[1]] <- F.0 else @@ -160,6 +161,7 @@ gmjmcmc <- function ( transforms = transforms # Transformations used by the model ) results$labels <- labels + results$fixed <- fixed attr(results, "class") <- "gmjmcmc" return(results) } diff --git a/R/gmjmcmc_support.R b/R/gmjmcmc_support.R index db52330..70aa584 100644 --- a/R/gmjmcmc_support.R +++ b/R/gmjmcmc_support.R @@ -96,20 +96,20 @@ marginal.probs.renorm <- function (models, type = "features") { # Function for precalculating features for a new feature population precalc.features <- function (data, features) { - precalc <- matrix(NA, nrow(data), length(features) + 2) - precalc[, 1:2] <- data[, 1:2] + precalc <- matrix(NA, nrow(data$x), length(features)) for (f in seq_along(features)) { feature_string <- print.feature(features[[f]], dataset = TRUE) - precalc[, (f + 2)] <- eval(parse(text = feature_string)) + precalc[, f] <- eval(parse(text = feature_string)) } # Replace any -Inf and Inf values caused by under- or overflow precalc <- replace.infinite.data.frame(precalc) - return(precalc) + data$x <- cbind(data$x[, seq_len(data$fixed)], precalc) + return(data) } # TODO: Compare to previous mliks here instead, also add a flag to do that in full likelihood estimation scenarios. # Function to call the model function -loglik.pre <- function (loglik.pi, model, complex, data, params = NULL, visited.models = visited.models, sub = sub) { +loglik.pre <- function (loglik.pi, model, complex, data, params = NULL, visited.models, sub) { if (!is.null(visited.models) && has_key(visited.models, model)) { if (!sub) { return(visited.models[[model]]) @@ -121,7 +121,7 @@ loglik.pre <- function (loglik.pi, model, complex, data, params = NULL, visited. # Get the complexity measures for just this model complex <- list(width = complex$width[model], oc = complex$oc[model], depth = complex$depth[model]) # Call the model estimator with the data and the model, note that we add the intercept to every model - model.res <- loglik.pi(data[, 1], data[, -1], c(T, model), complex, params) + model.res <- loglik.pi(data$y, data$x, c(rep(TRUE, data$fixed), model), complex, params) # Check that the critical value is acceptable if (!is.numeric(model.res$crit) || is.nan(model.res$crit)) model.res$crit <- -.Machine$double.xmax # Alpha cannot be calculated if the current and proposed models have crit which are -Inf or Inf @@ -135,34 +135,28 @@ loglik.pre <- function (loglik.pi, model, complex, data, params = NULL, visited. # Function to check the data # Checks that there is an intercept in the data, adds it if missing # Coerces the data to be of type matrix -check.data <- function (data, verbose) { - if (!is.matrix(data)) { - data <- as.matrix(data) - if (verbose) cat("Data coerced to matrix type.\n") +check.data <- function (x, y, fixed, verbose) { + if (!is.matrix(x)) { + x <- as.matrix(x) + if (verbose) cat("Data (x) coerced to matrix type.\n") } - if (sum(data[, 2] == 1) != nrow(data)) { - data <- cbind(data[, 1], 1, data[, -1]) - if (verbose) cat("Intercept added to data.\n") + if (!is.matrix(y)) { + y <- as.matrix(y) + if (verbose) cat("Data (y) coerced to matrix type.\n") } - return(data) -} - -# Function to get the dimensions of a dataset, adding an intercept if necessary -data.dims <- function (data) { - dims <- dim(data) - if (sum(data[,2] == 1) != nrow(data)) { - dims[2] <- dims[2] + 1 + if (nrow(x) != nrow(y)) { + stop("x and y must have the same number of rows") } - return(dims) + return(list(x = x, y = y, fixed = fixed)) } # Function to extract column names if they are well formed get.labels <- function (data, verbose) { - labels <- colnames(data)[-(1:2)] - if (is.null(labels)) return(F) + labels <- colnames(data$x)[seq.int(from = data$fixed + 1, ncol(data$x))] + if (is.null(labels)) return(FALSE) if (sum(is.na(labels)) != 0) { if (verbose) cat("NA labels present, using x#\n") - return(F) + return(FALSE) } return(labels) } \ No newline at end of file diff --git a/R/local_optim.R b/R/local_optim.R index 28696f0..ab11489 100644 --- a/R/local_optim.R +++ b/R/local_optim.R @@ -13,10 +13,9 @@ simulated.annealing <- function (model, data, loglik.pi, indices, complex, param temp <- params$t.init # Initial temperature # Calculate current likelihood - model.res <- loglik.pre(loglik.pi, model, complex, data, loglikparams,visited.models, sub) + model.res <- loglik.pre(loglik.pi, model, complex, data, loglikparams, visited.models, sub) model.lik <- model.res$crit - models[[length(models) + 1]] <- list(prob=NA, model=model, coefs=model.res$coefs, crit=model.lik, alpha=NA) - # print(paste("SA Start:", model.lik)) + models[[length(models) + 1]] <- list(prob = NA, model = model, coefs = model.res$coefs, crit = model.lik, alpha = NA) while (temp > params$t.min) { # Make M tries at current temperature for (m in 1:params$M) { @@ -25,7 +24,7 @@ simulated.annealing <- function (model, data, loglik.pi, indices, complex, param model.proposal <- loglik.pre(loglik.pi, proposal, complex, data, loglikparams, visited.models = visited.models, sub = sub) proposal.lik <- model.proposal$crit # Store the model that we have calculated - models[[length(models) + 1]] <- list(prob=NA, model=proposal, coefs=model.proposal$coefs, crit=proposal.lik, alpha=NA) + models[[length(models) + 1]] <- list(prob = NA, model = proposal, coefs = model.proposal$coefs, crit = proposal.lik, alpha = NA) # Calculate move probability for negative steps (Bolzmann distribution, see Blum and Roli p. 274) if (proposal.lik > model.lik) alpha <- 1 else alpha <- min(1, exp((proposal.lik - model.lik) / temp)) @@ -38,8 +37,7 @@ simulated.annealing <- function (model, data, loglik.pi, indices, complex, param # Update temperature temp <- temp * exp(-params$dt) } - # print(paste("SA Finish:", model.lik)) - return(list(model=model, kern=kernel, models=models)) + return(list(model = model, kern = kernel, models = models)) } greedy.optim <- function (model, data, loglik.pi, indices, complex, params, loglikparams, kernel = NULL, visited.models = NULL, sub = FALSE) { diff --git a/R/mjmcmc.R b/R/mjmcmc.R index 8e6e57b..445ea95 100644 --- a/R/mjmcmc.R +++ b/R/mjmcmc.R @@ -31,10 +31,10 @@ #' plot(result) #' #' @export mjmcmc -mjmcmc <- function (data, loglik.pi = gaussian.loglik, N = 100, probs = NULL, params = NULL, sub = FALSE, verbose = TRUE) { +mjmcmc <- function (x, y, loglik.pi = gaussian.loglik, fixed = 0, N = 100, probs = NULL, params = NULL, sub = FALSE, verbose = TRUE) { # Verify that data is well-formed - labels <- names(data)[-1] - data <- check.data(data, verbose) + labels <- names(x) + data <- check.data(x, y, fixed, verbose) # Generate default probabilities and parameters if there are none supplied. if (is.null(probs)) probs <- gen.probs.mjmcmc() @@ -44,13 +44,13 @@ mjmcmc <- function (data, loglik.pi = gaussian.loglik, N = 100, probs = NULL, pa accept <- 0 # Create a population of just the covariates - S <- gen.covariates(ncol(data)-2) + S <- gen.covariates(data) complex <- complex.features(S) # Initialize first model model.cur <- as.logical(rbinom(n = length(S), size = 1, prob = 0.5)) - model.cur.res <- loglik.pre(loglik.pi, model.cur, complex, data, params$loglik, visited.models=NULL, sub = sub) - model.cur <- list(prob=0, model=model.cur, coefs=model.cur.res$coefs, crit=model.cur.res$crit, alpha=0) + model.cur.res <- loglik.pre(loglik.pi, model.cur, complex, data, params$loglik, visited.models = NULL, sub = sub) + model.cur <- list(prob = 0, model = model.cur, coefs = model.cur.res$coefs, crit = model.cur.res$crit, alpha = 0) if (verbose) cat("\nMJMCMC begin.\n") result <- mjmcmc.loop(data, complex, loglik.pi, model.cur, N, probs, params, sub, verbose) @@ -60,6 +60,7 @@ mjmcmc <- function (data, loglik.pi = gaussian.loglik, N = 100, probs = NULL, pa result$populations <- S # Return formatted results + result$fixed <- data$fixed result$labels <- labels class(result) <- "mjmcmc" return(result) @@ -92,7 +93,7 @@ mjmcmc.loop <- function (data, complex, loglik.pi, model.cur, N, probs, params, # Acceptance count accept <- 0 # Number of covariates or features - covar_count <- ncol(data) - 2 + covar_count <- ncol(data$x) # A list of models that have been visited models <- vector("list", N) # Initialize a vector to contain local opt visited models diff --git a/R/parallel.R b/R/parallel.R index 839d6b7..3c61b0c 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -126,11 +126,11 @@ mjmcmc.parallel <- function(runs = 2, cores = getOption("mc.cores", 2L), ...) { #' plot(result) #' #' @export -gmjmcmc.parallel <- function(runs = 2, cores = getOption("mc.cores", 2L), merge.options = list(populations = "best", complex.measure = 2, tol = 0.0000001), data, loglik.pi = gaussian.loglik, loglik.alpha = gaussian.loglik.alpha, transforms, ...) { +gmjmcmc.parallel <- function(runs = 2, cores = getOption("mc.cores", 2L), merge.options = list(populations = "best", complex.measure = 2, tol = 0.0000001), x, y, loglik.pi = gaussian.loglik, loglik.alpha = gaussian.loglik.alpha, transforms, ...) { options("gmjmcmc-transformations" = transforms) - results <- rmclapply(seq_len(runs), args = list(data = data, loglik.pi = loglik.pi, loglik.alpha = loglik.alpha, transforms = transforms, ...), mc.cores = cores, fun = gmjmcmc) + results <- rmclapply(seq_len(runs), args = list(x = x, y = y, loglik.pi = loglik.pi, loglik.alpha = loglik.alpha, transforms = transforms, ...), mc.cores = cores, fun = gmjmcmc) class(results) <- "gmjmcmc_parallel" - merged <- merge_results(results, merge.options$populations, merge.options$complex.measure, merge.options$tol, data = data) + merged <- merge_results(results, merge.options$populations, merge.options$complex.measure, merge.options$tol, data = list(x = x, y = y)) gc() return(merged) } diff --git a/R/predict.R b/R/predict.R index 3171f73..f375a5d 100644 --- a/R/predict.R +++ b/R/predict.R @@ -70,25 +70,9 @@ predict.bgnlm_model <- function(object, x, link = function(x) { x }, ... ) { #' @export predict.gmjmcmc <- function (object, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), pop = NULL,tol = 0.0000001, ...) { transforms.bak <- set.transforms(object$transforms) + x <- impute_x(object, x) - - - if(!is.null(attr(object,which = "imputed"))) - { - df <- data.frame(x) - na.matr <- data.frame(1*(is.na(df))) - cm <- colMeans(na.matr) - na.matr <- na.matr[,attr(object,which = "imputed")] - names(na.matr) <- paste0("mis_",names(na.matr)) - for (i in which(cm!=0)){ - df[[i]][is.na(df[[i]])] <- median(df[[i]], na.rm = TRUE) - } - x <- as.matrix(data.frame(df,na.matr)) - rm(df) - rm(na.matr) - } else x <- as.matrix(x) - - merged <- merge_results(list(object),data = cbind(1, x), populations = pop, tol = tol) + merged <- merge_results(list(object), data = list(x = x), populations = pop, tol = tol) set.transforms(transforms.bak) return(predict.gmjmcmc_merged(merged, x, link, quantiles)) } @@ -100,21 +84,7 @@ predict.gmjmcmc <- function (object, x, link = function(x) x, quantiles = c(0.02 #' @noRd predict.gmjmcmc.2 <- function (object, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), pop = 1, ...) { transforms.bak <- set.transforms(object$transforms) - - if(!is.null(attr(object,which = "imputed"))) - { - df <- data.frame(x) - na.matr <- data.frame(1*(is.na(df))) - cm <- colMeans(na.matr) - na.matr <- na.matr[,attr(object,which = "imputed")] - names(na.matr) <- paste0("mis_",names(na.matr)) - for (i in which(cm!=0)){ - df[[i]][is.na(df[[i]])] <- median(df[[i]], na.rm = TRUE) - } - x <- as.matrix(data.frame(df,na.matr)) - rm(df) - rm(na.matr) - } else x <- as.matrix(x) + x <- impute_x(object, x) mmodel <- lapply(object[1:8], function (x) x[[pop]]) @@ -153,23 +123,10 @@ predict.gmjmcmc.2 <- function (object, x, link = function(x) x, quantiles = c(0. #' #' @export predict.gmjmcmc_merged <- function (object, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), pop = NULL,tol = 0.0000001, ...) { - if(!is.null(attr(object,which = "imputed"))) - { - df <- data.frame(x) - na.matr <- data.frame(1*(is.na(df))) - cm <- colMeans(na.matr) - na.matr <- na.matr[,attr(object,which = "imputed")] - names(na.matr) <- paste0("mis_",names(na.matr)) - for (i in which(cm!=0)){ - df[[i]][is.na(df[[i]])] <- median(df[[i]], na.rm = TRUE) - } - x <- as.matrix(data.frame(df,na.matr)) - rm(df) - rm(na.matr) - } else x <- as.matrix(x) + x <- impute_x(object, x) transforms.bak <- set.transforms(object$transforms) - if(!is.null(pop)) + if (!is.null(pop)) object <- merge_results(object$results.raw, pop, 2, tol, data = x) preds <- list() @@ -181,14 +138,14 @@ predict.gmjmcmc_merged <- function (object, x, link = function(x) x, quantiles = features <- object$results[[i]]$populations[[j]] model.probs <- object$results[[i]]$model.probs[[j]] - # Precalculate the features for the new data (c(0,1...) is because precalc features thinks there is an intercept and y col). - x.precalc <- precalc.features(cbind(0, 1, x), features)[, -1] + # Precalculate the features for the new data + x.precalc <- precalc.features(list(x = x, fixed = object$fixed), features)$x yhat <- matrix(0, nrow=nrow(x), ncol=length(models)) for (k in seq_along(models)) { # Models which have 0 weight are skipped since they may also be invalid, and would not influence the predictions. if (models[[k]]$crit == -.Machine$double.xmax) next - yhat[, k] <- link(x.precalc[, c(TRUE, models[[k]]$model), drop=FALSE] %*% models[[k]]$coefs) + yhat[, k] <- link(x.precalc[, c(rep(TRUE, object$fixed), models[[k]]$model), drop=FALSE] %*% models[[k]]$coefs) } mean.pred <- rowSums(yhat %*% diag(as.numeric(model.probs))) @@ -225,24 +182,7 @@ predict.gmjmcmc_merged <- function (object, x, link = function(x) x, quantiles = #' @export predict.mjmcmc <- function (object, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), ...) { # Select the models and features to predict from at this iteration - - if(!is.null(attr(object,which = "imputed"))) - { - df <- data.frame(x) - na.matr <- data.frame(1*(is.na(df))) - cm <- colMeans(na.matr) - na.matr <- na.matr[,attr(object,which = "imputed")] - names(na.matr) <- paste0("mis_",names(na.matr)) - for (i in which(cm!=0)){ - df[[i]][is.na(df[[i]])] <- median(df[[i]], na.rm = TRUE) - } - x <- as.matrix(data.frame(df,na.matr)) - rm(df) - rm(na.matr) - } else x <- as.matrix(x) - - x <- as.matrix(cbind(rep(1, dim(x)[1]), x)) - + x <- impute_x(object, x) models <- c(object$models, object$lo.models)[object$model.probs.idx] @@ -250,7 +190,7 @@ predict.mjmcmc <- function (object, x, link = function(x) x, quantiles = c(0.025 for (k in seq_along(models)) { # Models which have 0 weight are skipped since they may also be invalid, and would not influence the predictions. if (models[[k]]$crit == -.Machine$double.xmax) next - yhat[, k] <- link(x[, c(TRUE, models[[k]]$model), drop=FALSE] %*% models[[k]]$coefs) + yhat[, k] <- link(x[, c(rep(TRUE, object$fixed), models[[k]]$model), drop=FALSE] %*% models[[k]]$coefs) } mean.pred <- rowSums(yhat %*% diag(as.numeric(object$model.probs))) @@ -273,21 +213,7 @@ predict.mjmcmc <- function (object, x, link = function(x) x, quantiles = c(0.025 #' @export predict.mjmcmc_parallel <- function (object, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), ...) { max.crits <- numeric() - - if(!is.null(attr(object,which = "imputed"))) - { - df <- data.frame(x) - na.matr <- data.frame(1*(is.na(df))) - cm <- colMeans(na.matr) - na.matr <- na.matr[,attr(object,which = "imputed")] - names(na.matr) <- paste0("mis_",names(na.matr)) - for (i in which(cm!=0)){ - df[[i]][is.na(df[[i]])] <- median(df[[i]], na.rm = TRUE) - } - x <- as.matrix(data.frame(df,na.matr)) - rm(df) - rm(na.matr) - } else x <- as.matrix(x) + x <- impute_x(object, x) for (i in seq_along(object)) { max.crits <- c(max.crits, object[[i]]$best.crit) @@ -335,22 +261,7 @@ predict.mjmcmc_parallel <- function (object, x, link = function(x) x, quantiles #' @export predict.gmjmcmc_parallel <- function (object, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), ...) { transforms.bak <- set.transforms(object$transforms) - - if(!is.null(attr(object,which = "imputed"))) - { - df <- data.frame(x) - na.matr <- data.frame(1*(is.na(df))) - cm <- colMeans(na.matr) - na.matr <- na.matr[,attr(object,which = "imputed")] - names(na.matr) <- paste0("mis_",names(na.matr)) - for (i in which(cm!=0)){ - df[[i]][is.na(df[[i]])] <- median(df[[i]], na.rm = TRUE) - } - x <- as.matrix(data.frame(df,na.matr)) - rm(df) - rm(na.matr) - } else x <- as.matrix(x) - + x <- impute_x(object, x) merged <- merge_results(object,data = cbind(1, x), ...) results <- predict.gmjmcmc_merged(merged, x, link, quantiles) set.transforms(transforms.bak) @@ -375,3 +286,18 @@ weighted.quantiles <- function (values, weights, prob = c(0.025, 0.975)) { } {values[ordered]}[iv] } + +impute_x <- function (object, x) { + if (!is.null(attr(object, which = "imputed"))) { + df <- data.frame(x) + na.matr <- data.frame(1 * (is.na(df))) + cm <- colMeans(na.matr) + na.matr <- na.matr[, attr(object, which = "imputed")] + names(na.matr) <- paste0("mis_", names(na.matr)) + for (i in which(cm != 0)){ + df[[i]][is.na(df[[i]])] <- median(df[[i]], na.rm = TRUE) + } + return(as.matrix(data.frame(df,na.matr))) + } + return(x) +} diff --git a/R/results.R b/R/results.R index fdc1681..2d153e6 100644 --- a/R/results.R +++ b/R/results.R @@ -107,7 +107,7 @@ merge_results <- function (results, populations = NULL, complex.measure = NULL, accept.tot <- results[[i]]$accept.tot best <- results[[i]]$best for (item in names(results[[i]])) { - if (!(item %in% (c("accept.tot", "best", "transforms")))) results[[i]][[item]] <- results[[i]][[item]][pops.use[[i]]] + if (!(item %in% (c("accept.tot", "best", "transforms", "fixed")))) results[[i]][[item]] <- results[[i]][[item]][pops.use[[i]]] } results[[i]]$accept.tot <- accept.tot results[[i]]$best <- best @@ -126,10 +126,11 @@ merge_results <- function (results, populations = NULL, complex.measure = NULL, ## Detect equivalent features # Generate mock data to compare features with - if (is.null(data)) mock.data <- matrix(runif((feat.count + 2)^2, -100, 100), ncol = feat.count + 2) - else mock.data <- check.data(data, FALSE) + if (is.null(data)) mock.data <- list(x = matrix(runif((feat.count)^2, -100, 100), ncol = feat.count)) + else mock.data <- data + mock.data$fixed = results[[1]]$fixed - mock.data.precalc <- precalc.features(mock.data, features)[,-(1:2)] + mock.data.precalc <- precalc.features(mock.data, features)$x[ , seq_len(feat.count) + results[[1]]$fixed, drop = FALSE] # Calculate the correlation to find equivalent features cors <- cor(mock.data.precalc) @@ -141,10 +142,10 @@ merge_results <- function (results, populations = NULL, complex.measure = NULL, for (i in seq_len(nrow(cors))) { equiv.feats <- which(cors[i, ] >= (1 - tol)) # Compare equivalent features complexity to find most simple - equiv.complex <- list(width=complex$width[equiv.feats], oc=complex$oc[equiv.feats], depth=complex$depth[equiv.feats]) + equiv.complex <- list(width = complex$width[equiv.feats], oc = complex$oc[equiv.feats], depth = complex$depth[equiv.feats]) equiv.simplest <- lapply(equiv.complex, which.min) - feats.map[1:3,equiv.feats] <- c(equiv.feats[equiv.simplest$width], equiv.feats[equiv.simplest$oc], equiv.feats[equiv.simplest$depth]) - feats.map[4,equiv.feats] <- sum(renorms[equiv.feats]) + feats.map[1:3, equiv.feats] <- c(equiv.feats[equiv.simplest$width], equiv.feats[equiv.simplest$oc], equiv.feats[equiv.simplest$depth]) + feats.map[4, equiv.feats] <- sum(renorms[equiv.feats]) } # Select the simplest features based on the specified complexity measure and sort them feats.simplest.ids <- unique(feats.map[complex.measure, ]) @@ -165,7 +166,8 @@ merge_results <- function (results, populations = NULL, complex.measure = NULL, rep.pop = pw$pop.best, best.log.posteriors = bests, rep.thread = pw$thread.best, - transforms = results[[1]]$transforms + transforms = results[[1]]$transforms, + fixed = results[[1]]$fixed ) attr(merged, "class") <- "gmjmcmc_merged" return(merged) diff --git a/tests/testthat/test_mjmcmc.R b/tests/testthat/test_mjmcmc.R index 0257d46..5b86b94 100644 --- a/tests/testthat/test_mjmcmc.R +++ b/tests/testthat/test_mjmcmc.R @@ -17,22 +17,22 @@ test_that("Testing MJMCMC algorithm", { } data <- matrix(rnorm(600), 100) - resm <- mjmcmc(data, loglik.tester) + resm <- mjmcmc(data[,-1], data[,1], loglik.tester, 1) summary(resm, labels = c("a", "b", "c", "d", "e")) plot(resm) predm <- predict(resm, data[, -1, drop = FALSE]) - resg <- gmjmcmc(data, loglik.tester, NULL, c("p0", "exp_dbl")) + resg <- gmjmcmc(data[,-1], data[,1], loglik.tester, NULL, c("p0", "exp_dbl"), 1) summary(resg) plot(resg) prediction <- predict(resg, data[, -1, drop = FALSE]) - respm <- mjmcmc.parallel(2, 2, data, loglik.tester) + respm <- mjmcmc.parallel(2, 2, data[,-1], data[,1], loglik.tester) summary(respm) plot(respm) pred_pm <- predict(respm, data[, -1, drop = FALSE]) - respg <- gmjmcmc.parallel(2, 2, NULL, data, loglik.tester, NULL, c("p0", "exp_dbl")) + respg <- gmjmcmc.parallel(2, 2, NULL, data[,-1], data[,1], loglik.tester, NULL, c("p0", "exp_dbl")) summary(respg) plot(respg) pred_pg <- predict(respg, data[, -1, drop = FALSE]) -- GitLab From 4f31fcfabcdc3762a67b5cf13a3b4b0efbb0a7e4 Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Mon, 24 Mar 2025 20:29:56 +0100 Subject: [PATCH 02/36] WIP --- vignettes/FBMS-guide.Rmd | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vignettes/FBMS-guide.Rmd b/vignettes/FBMS-guide.Rmd index c95c0c2..91c2bcc 100644 --- a/vignettes/FBMS-guide.Rmd +++ b/vignettes/FBMS-guide.Rmd @@ -20,10 +20,11 @@ knitr::opts_chunk$set( Below are provided examples on how to run the algorithm and what the results tell us, we begin by loading the package and a supplied dataset ```{r setup} library(FBMS) -library(GenSA) -library(fastglm) +#library(GenSA) +#library(fastglm) data("breastcancer") bc <- breastcancer[,c(ncol(breastcancer),2:(ncol(breastcancer)-1))] +bc <- cbind(bc[, 1], 1, bc[, -1]) ``` We need some nonlinear transformations for the algorithm to use, the package offers a selection of these, but you are also able to define your own. Here we create a list of the ones to use, all but one are supplied by the package. ```{r} @@ -53,7 +54,7 @@ params$feat$alpha <- "random" We are now ready to run the algorithm, in this vignette we will only run very few iterations for demonstration purposes, but the only thing that needs to be changed are the number or populations to visit `T`, the number of iterations per population `N` and the number of iterations for the final population `N.final` ```{r} set.seed(1234) -result <- gmjmcmc(bc, loglik.example, logistic.loglik.alpha, transforms, P=3, N=30, N.final=60, probs, params) +result <- gmjmcmc(bc[, -1], bc[, 1], fixed = 1, loglik.example, logistic.loglik.alpha, transforms, P=3, N=30, N.final=60, probs, params) ``` We can then summarize the results using the supplied function and get a plot of the importance of the parameters in the last population of features ```{r, fig.width=6, fig.height=6} -- GitLab From 3171a902721b9ba19bbc82de085cf8fbcfc61548 Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Mon, 24 Mar 2025 21:52:48 +0100 Subject: [PATCH 03/36] * Work in progress - should the intercept be mandatory? --- R/arguments.R | 2 +- R/fbms.R | 29 +++++++++++++++---- R/gmjmcmc.R | 25 +++++++--------- R/gmjmcmc_support.R | 2 +- R/likelihoods.R | 9 ++---- R/local_optim.R | 4 +-- R/mjmcmc.R | 7 +++-- R/parallel.R | 15 ++++++++-- R/predict.R | 4 +-- tests_current/Ex10_Sec6_2.R | 12 ++++---- tests_current/Ex11_Sec6_3.R | 8 ++--- tests_current/Ex12_Sec6_4.R | 4 +-- tests_current/Ex13_Sec6_5.R | 4 +-- tests_current/Ex1_Sec3.R | 26 +++++++++-------- tests_current/Ex3_Sec4_1.R | 2 +- .../Ex3_Sec4_1_Removed, replaced_by_Ex7.R | 6 ++-- tests_current/Ex4_Sec4_2.R | 2 +- tests_current/Ex5_Sec4_3.R | 2 +- tests_current/Ex6_Sec4_4.R | 4 +-- tests_current/Ex8_Sec5.2_logic_regression.R | 24 +++++++-------- tests_current/Ex8_Sec5_2.R | 2 +- tests_current/Ex9_Sec6_1.R | 4 +-- 22 files changed, 110 insertions(+), 87 deletions(-) diff --git a/R/arguments.R b/R/arguments.R index 812a9c0..38b7037 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -214,7 +214,7 @@ gen.params.mjmcmc <- function (data) { mh_params <- list(neigh.size = 1, neigh.min = 1, neigh.max = 2) # Regular MH parameters ## Compile the list and return params <- list(burn_in=burn_in, mh=mh_params, large=large_params, random=random_params, - sa=sa_params, greedy=greedy_params, loglik=list()) + sa=sa_params, greedy=greedy_params, mlpost=list()) return(params) } diff --git a/R/fbms.R b/R/fbms.R index 2a99c5f..6fb907e 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -33,9 +33,17 @@ #' #' @seealso \code{\link{mjmcmc}}, \code{\link{gmjmcmc}}, \code{\link{gmjmcmc.parallel}} #' @export -fbms <- function(formula = NULL, family = "gaussian", data = NULL, impute = FALSE, +fbms <- function(formula = NULL, family = "gaussian", + beta_prior = list(type = "g-prior", g = 5), + model_prior = list(r = exp(-0.5)), + data = NULL, impute = FALSE, loglik.pi = gaussian.loglik, method = "mjmcmc", verbose = TRUE, ...) { + if (is.list(beta_prior) || is.list(model_prior)) { + mlpost_params <- gen.mlpost.params(beta_prior, model_prior) + loglik.pi <- select.mlpost.fun(beta_prior, model_prior, family) + } + if (family == "gaussian") loglik.pi <- gaussian.loglik else if (family == "binomial") @@ -106,13 +114,13 @@ fbms <- function(formula = NULL, family = "gaussian", data = NULL, impute = FALS } if (method == "mjmcmc.parallel") - res <- mjmcmc.parallel(X, Y, loglik.pi, fixed = intercept, verbose = verbose, ...) + res <- mjmcmc.parallel(X, Y, loglik.pi, mlpost_params = mlpost_params, fixed = intercept, verbose = verbose, ...) else if (method == "mjmcmc") - res <- mjmcmc(X, Y, loglik.pi, fixed = intercept, verbose = verbose, ...) + res <- mjmcmc(X, Y, loglik.pi, mlpost_params = mlpost_params, fixed = intercept, verbose = verbose, ...) else if (method == "gmjmcmc.parallel") - res <- gmjmcmc.parallel(x = X, y = Y, loglik.pi = loglik.pi, fixed = intercept, verbose = verbose,...) + res <- gmjmcmc.parallel(x = X, y = Y, loglik.pi = loglik.pi, mlpost_params = mlpost_params, fixed = intercept, verbose = verbose,...) else if (method == "gmjmcmc") - res <- gmjmcmc(X, Y, loglik.pi, fixed = intercept, verbose = verbose, ...) + res <- gmjmcmc(X, Y, loglik.pi, mlpost_params = mlpost_params, fixed = intercept, verbose = verbose, ...) else stop("Error: Method must be one of gmjmcmc, gmjmcmc.parallel, mjmcmc or mjmcmc.parallel!") @@ -120,4 +128,13 @@ fbms <- function(formula = NULL, family = "gaussian", data = NULL, impute = FALS attr(res, "all_names") <- names(X)[1:(dim(X)[2] - 1)] options(na.action = na.opt) return(res) -} \ No newline at end of file +} + +gen.mlpost.params <- function (beta_prior, model_prior) { + return(list()) +} + +select.mlpost.fun <- function (beta_prior, model_prior, family) { + return(gaussian.loglik.g) +} + diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index 10fb2b3..e57eb04 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -50,6 +50,7 @@ gmjmcmc <- function ( y, loglik.pi = gaussian.loglik, loglik.alpha = gaussian.loglik.alpha, + mlpost_params = NULL, transforms, fixed = 0, P = 10, @@ -66,6 +67,7 @@ gmjmcmc <- function ( # Generate default probabilities and parameters if there are none supplied. if (is.null(probs)) probs <- gen.probs.gmjmcmc(transforms) if (is.null(params)) params <- gen.params.gmjmcmc(data) + if (!is.null(mlpost_params)) params$mlpost <- mlpost_params # Extract labels from column names in dataframe labels <- get.labels(data, verbose) @@ -108,7 +110,7 @@ gmjmcmc <- function ( # Initialize first model of population model.cur <- as.logical(rbinom(n = length(S[[p]]), size = 1, prob = 0.5)) - model.cur.res <- loglik.pre(loglik.pi, model.cur, complex, data.t, params$loglik, NULL, FALSE) + model.cur.res <- loglik.pre(loglik.pi, model.cur, complex, data.t, params$mlpost, NULL, FALSE) model.cur <- list(prob = 0, model = model.cur, coefs = model.cur.res$coefs, crit = model.cur.res$crit, alpha = 0) best.crit <- model.cur$crit # Reset first best criteria value @@ -187,8 +189,7 @@ gmjmcmc <- function ( gmjmcmc.transition <- function (S.t, F.0, data, loglik.alpha, marg.probs.F.0, marg.probs, labels, probs, params, verbose = TRUE) { # Sample which features to keep based on marginal inclusion below probs$filter feats.keep <- as.logical(rbinom(n = length(marg.probs), size = 1, prob = pmin(marg.probs / probs$filter, 1))) - - + # Always keep original covariates if that setting is on if (params$keep.org) { if (params$prel.filter > 0) { @@ -198,24 +199,21 @@ gmjmcmc.transition <- function (S.t, F.0, data, loglik.alpha, marg.probs.F.0, ma else feats.keep[seq_along(F.0)] <- T } - # Avoid removing too many features if (length(feats.keep) > 0 && mean(feats.keep) < params$keep.min & sum(feats.keep) < params$pop.max/2) { feats.add.n <- round((params$keep.min - mean(feats.keep)) * length(feats.keep)) feats.add <- sample(which(!feats.keep), feats.add.n) - if((length(feats.add) + sum(feats.keep))>=params$pop.max) + if ((length(feats.add) + sum(feats.keep)) >= params$pop.max) feats.keep[feats.add] <- T } - if(sum(feats.keep)>params$pop.max) - { + if (sum(feats.keep)>params$pop.max) { warning("Number of features to keep greater than pop.max! Continue with first pop.max features to be kept! \n Ignore if the final set of features with high probabilities is smaller than the specified $feat$pop.max \n Otherwise check your tuning parameters and increase $feat$pop.max or probs$filter!") feats.keep[which(feats.keep==TRUE)[(params$pop.max+1):length(which(feats.keep==TRUE))]] <- FALSE } - # Create a list of which features to replace feats.replace <- which(!feats.keep) @@ -225,18 +223,15 @@ gmjmcmc.transition <- function (S.t, F.0, data, loglik.alpha, marg.probs.F.0, ma marg.probs.use <- c(rep(params$eps, length(F.0)), pmin(pmax(marg.probs, params$eps), (1-params$eps))) # Perform the replacements - if(length(S.t)>params$pop.max) + if (length(S.t) > params$pop.max) feats.replace <- sort(feats.replace,decreasing = T) for (i in feats.replace) { prev.size <- length(S.t) - prev.feat.string <- print.feature(S.t[[i]], labels=labels, round = 2) - if(prev.size>params$pop.max) - { + prev.feat.string <- print.feature(S.t[[i]], labels = labels, round = 2) + if (prev.size > params$pop.max) { cat("Removed feature", prev.feat.string, "\n") S.t[[i]] <- NULL - } - else - { + } else { S.t[[i]] <- gen.feature(c(F.0, S.t), marg.probs.use, data, loglik.alpha, probs, length(F.0), params, verbose) if (prev.size > length(S.t)) { if (verbose) { diff --git a/R/gmjmcmc_support.R b/R/gmjmcmc_support.R index 70aa584..21c4fe0 100644 --- a/R/gmjmcmc_support.R +++ b/R/gmjmcmc_support.R @@ -152,7 +152,7 @@ check.data <- function (x, y, fixed, verbose) { # Function to extract column names if they are well formed get.labels <- function (data, verbose) { - labels <- colnames(data$x)[seq.int(from = data$fixed + 1, ncol(data$x))] + labels <- colnames(data$x) if (is.null(labels)) return(FALSE) if (sum(is.na(labels)) != 0) { if (verbose) cat("NA labels present, using x#\n") diff --git a/R/likelihoods.R b/R/likelihoods.R index 2016b5f..0da9068 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -254,9 +254,7 @@ gaussian.loglik <- function (y, x, model, complex, params) { #' gaussian.loglik.g(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) #' #' @export gaussian.loglik.g -gaussian.loglik.g <- function (y, x, model, complex, params = NULL) -{ - +gaussian.loglik.g <- function (y, x, model, complex, params = NULL) { suppressWarnings({ mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) }) @@ -267,8 +265,7 @@ gaussian.loglik.g <- function (y, x, model, complex, params = NULL) RSS <- sum(mod$residuals^2) Rsquare <- 1 - (RSS / TSS) - if (length(params$r) == 0) - { + if (length(params$r) == 0) { params$r <- 1/dim(x)[1] params$g <- max(mod$rank^2,length(y)) } @@ -277,7 +274,7 @@ gaussian.loglik.g <- function (y, x, model, complex, params = NULL) mloglik <- 0.5*(log(1.0 + params$g) * (dim(x)[1] - mod$rank) - log(1.0 + params$g * (1.0 - Rsquare)) * (dim(x)[1] - 1))*(mod$rank!=1) # logarithm of model prior - # default value or parameter r + # default value or parameter r lp <- log_prior(params, complex) return(list(crit = mloglik + lp, coefs = mod$coefficients)) diff --git a/R/local_optim.R b/R/local_optim.R index ab11489..a198329 100644 --- a/R/local_optim.R +++ b/R/local_optim.R @@ -80,10 +80,10 @@ greedy.optim <- function (model, data, loglik.pi, indices, complex, params, logl local.optim <- function (model, data, loglik.pi, indices, complex, type, params, kernel = NULL, visited.models = NULL, sub = FALSE) { if (type == 1) { - return(simulated.annealing(model, data, loglik.pi, indices, complex, params$sa, params$loglik, kernel, visited.models = visited.models, sub = sub)) + return(simulated.annealing(model, data, loglik.pi, indices, complex, params$sa, params$mlpost, kernel, visited.models = visited.models, sub = sub)) } if (type == 2) { - return(greedy.optim(model, data, loglik.pi, indices, complex, params$greedy, params$loglik, kernel, visited.models = visited.models, sub = sub)) + return(greedy.optim(model, data, loglik.pi, indices, complex, params$greedy, params$mlpost, kernel, visited.models = visited.models, sub = sub)) } if (type == 3) { return("not implemented") diff --git a/R/mjmcmc.R b/R/mjmcmc.R index 445ea95..199261b 100644 --- a/R/mjmcmc.R +++ b/R/mjmcmc.R @@ -31,7 +31,7 @@ #' plot(result) #' #' @export mjmcmc -mjmcmc <- function (x, y, loglik.pi = gaussian.loglik, fixed = 0, N = 100, probs = NULL, params = NULL, sub = FALSE, verbose = TRUE) { +mjmcmc <- function (x, y, loglik.pi = gaussian.loglik, mlpost_params = NULL, fixed = 0, N = 100, probs = NULL, params = NULL, sub = FALSE, verbose = TRUE) { # Verify that data is well-formed labels <- names(x) data <- check.data(x, y, fixed, verbose) @@ -39,6 +39,7 @@ mjmcmc <- function (x, y, loglik.pi = gaussian.loglik, fixed = 0, N = 100, probs # Generate default probabilities and parameters if there are none supplied. if (is.null(probs)) probs <- gen.probs.mjmcmc() if (is.null(params)) params <- gen.params.mjmcmc(data) + if (!is.null(mlpost_params)) params$mlpost <- mlpost_params # Acceptance probability accept <- 0 @@ -49,7 +50,7 @@ mjmcmc <- function (x, y, loglik.pi = gaussian.loglik, fixed = 0, N = 100, probs # Initialize first model model.cur <- as.logical(rbinom(n = length(S), size = 1, prob = 0.5)) - model.cur.res <- loglik.pre(loglik.pi, model.cur, complex, data, params$loglik, visited.models = NULL, sub = sub) + model.cur.res <- loglik.pre(loglik.pi, model.cur, complex, data, params$mlpost, visited.models = NULL, sub = sub) model.cur <- list(prob = 0, model = model.cur, coefs = model.cur.res$coefs, crit = model.cur.res$crit, alpha = 0) if (verbose) cat("\nMJMCMC begin.\n") @@ -214,7 +215,7 @@ mjmcmc.prop <- function (data, loglik.pi, model.cur, complex, pip_estimate, prob model.cur$prob <- prob.proposal(proposal$model, model.cur$model, q.g, params$mh, pip_estimate) } # Calculate log likelihoods for the proposed model - proposal.res <- loglik.pre(loglik.pi, proposal$model, complex, data, params$loglik, visited.models=visited.models, sub = sub) + proposal.res <- loglik.pre(loglik.pi, proposal$model, complex, data, params$mlpost, visited.models=visited.models, sub = sub) proposal$crit <- proposal.res$crit # Calculate acceptance probability for proposed model diff --git a/R/parallel.R b/R/parallel.R index 3c61b0c..bb8d7cc 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -126,9 +126,20 @@ mjmcmc.parallel <- function(runs = 2, cores = getOption("mc.cores", 2L), ...) { #' plot(result) #' #' @export -gmjmcmc.parallel <- function(runs = 2, cores = getOption("mc.cores", 2L), merge.options = list(populations = "best", complex.measure = 2, tol = 0.0000001), x, y, loglik.pi = gaussian.loglik, loglik.alpha = gaussian.loglik.alpha, transforms, ...) { +gmjmcmc.parallel <- function( + runs = 2, + cores = getOption("mc.cores", 2L), + merge.options = list(populations = "best", complex.measure = 2, tol = 0.0000001), + x, + y, + loglik.pi = gaussian.loglik, + loglik.alpha = gaussian.loglik.alpha, + mlpost_params = NULL, + transforms, + ... +) { options("gmjmcmc-transformations" = transforms) - results <- rmclapply(seq_len(runs), args = list(x = x, y = y, loglik.pi = loglik.pi, loglik.alpha = loglik.alpha, transforms = transforms, ...), mc.cores = cores, fun = gmjmcmc) + results <- rmclapply(seq_len(runs), args = list(x = x, y = y, loglik.pi = loglik.pi, loglik.alpha = loglik.alpha, mlpost_params = mlpost_params, transforms = transforms, ...), mc.cores = cores, fun = gmjmcmc) class(results) <- "gmjmcmc_parallel" merged <- merge_results(results, merge.options$populations, merge.options$complex.measure, merge.options$tol, data = list(x = x, y = y)) gc() diff --git a/R/predict.R b/R/predict.R index f375a5d..0c71949 100644 --- a/R/predict.R +++ b/R/predict.R @@ -122,7 +122,7 @@ predict.gmjmcmc.2 <- function (object, x, link = function(x) x, quantiles = c(0. #' preds <- predict(result, matrix(rnorm(600), 100)) #' #' @export -predict.gmjmcmc_merged <- function (object, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), pop = NULL,tol = 0.0000001, ...) { +predict.gmjmcmc_merged <- function (object, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), pop = NULL, tol = 0.0000001, ...) { x <- impute_x(object, x) transforms.bak <- set.transforms(object$transforms) @@ -141,7 +141,7 @@ predict.gmjmcmc_merged <- function (object, x, link = function(x) x, quantiles = # Precalculate the features for the new data x.precalc <- precalc.features(list(x = x, fixed = object$fixed), features)$x - yhat <- matrix(0, nrow=nrow(x), ncol=length(models)) + yhat <- matrix(0, nrow = nrow(x), ncol = length(models)) for (k in seq_along(models)) { # Models which have 0 weight are skipped since they may also be invalid, and would not influence the predictions. if (models[[k]]$crit == -.Machine$double.xmax) next diff --git a/tests_current/Ex10_Sec6_2.R b/tests_current/Ex10_Sec6_2.R index 53e83a8..70d0b62 100644 --- a/tests_current/Ex10_Sec6_2.R +++ b/tests_current/Ex10_Sec6_2.R @@ -42,11 +42,11 @@ probs$gen <- c(1,1,0,1) # Modifications and interactions! params <- gen.params.gmjmcmc(df) params$feat$D <- 1 # Set depth of features to 1 (still allows for interactions) -params$loglik$r = 1/dim(df)[1] +params$mlpost$r = 1/dim(df)[1] params$feat$pop.max = 10 #specify indices for a random effect -params$loglik$dr = droplevels(Zambia$dr) # district ids for repeated measurements +params$mlpost$dr = droplevels(Zambia$dr) # district ids for repeated measurements #estimator function with lme4 @@ -87,7 +87,7 @@ mixed.model.loglik.lme4 <- function (y, x, model, complex, params) #estimator function with INLA -params$loglik$INLA.num.threads = 10 # Number of threads used by INLA +params$mlpost$INLA.num.threads = 10 # Number of threads used by INLA #params$feat$keep.min = 0.2 @@ -135,7 +135,7 @@ mixed.model.loglik.inla <- function (y, x, model, complex, params) #estimator function with RTMB -params$loglik$nr_dr = sum((table(Zambia$dr))>0) #number of districts (that is number of different random intercepts) +params$mlpost$nr_dr = sum((table(Zambia$dr))>0) #number of districts (that is number of different random intercepts) mixed.model.loglik.rtmb <- function (y, x, model, complex, params) { @@ -288,7 +288,7 @@ summary(result2c, labels = names(df)[-1]) set.seed(22052024) -params$loglik$INLA.num.threads = 1 # Number of threads used by INLA set to 1 +params$mlpost$INLA.num.threads = 1 # Number of threads used by INLA set to 1 result2a <- gmjmcmc.parallel(runs = 20, cores = 20, data = df, loglik.pi = mixed.model.loglik.inla, transforms = transforms, N.init=30, probs = probs, params = params, P = 25) plot(result2a) @@ -300,7 +300,7 @@ summary(result2a, labels = names(df)[-1]) params$feat$check.col = F set.seed(20062024) -params$loglik$INLA.num.threads = 1 # Number of threads used by INLA set to 1 +params$mlpost$INLA.num.threads = 1 # Number of threads used by INLA set to 1 result2b <- gmjmcmc.parallel(runs = 100, cores = 20, data = df, loglik.pi = mixed.model.loglik.inla, transforms = transforms, N.init=16, probs = probs, params = params, P = 15) summary(result2b, labels = names(df)[-1]) diff --git a/tests_current/Ex11_Sec6_3.R b/tests_current/Ex11_Sec6_3.R index 113b395..771ef79 100644 --- a/tests_current/Ex11_Sec6_3.R +++ b/tests_current/Ex11_Sec6_3.R @@ -37,11 +37,11 @@ probs$gen <- c(1,1,0,1) # Only modifications! params <- gen.params.gmjmcmc(df) params$feat$D <- 2 # Set depth of features to 2 (allow for interactions) -params$loglik$r = 1/dim(df)[1] +params$mlpost$r = 1/dim(df)[1] #specify indices for a random effect -params$loglik$PID = data$Ind # patient ids for repeated measurements -params$loglik$INLA.num.threads = 10 # Number of threads used by INLA +params$mlpost$PID = data$Ind # patient ids for repeated measurements +params$mlpost$INLA.num.threads = 10 # Number of threads used by INLA params$feat$keep.min = 0.2 @@ -113,7 +113,7 @@ summary(result) set.seed(23052024) tic() -params$loglik$INLA.num.threads = 1 # Number of threads used by INLA set to 1 +params$mlpost$INLA.num.threads = 1 # Number of threads used by INLA set to 1 if (use.fbms) { result2 <- fbms(data = df, family = "custom", loglik.pi = poisson.loglik.inla, method = "gmjmcmc.parallel", runs = 40, cores = 40, diff --git a/tests_current/Ex12_Sec6_4.R b/tests_current/Ex12_Sec6_4.R index 884f1d3..3eb5e4b 100644 --- a/tests_current/Ex12_Sec6_4.R +++ b/tests_current/Ex12_Sec6_4.R @@ -37,8 +37,8 @@ n = dim(df)[1] p = dim(df)[2] - 1 params <- gen.params.gmjmcmc(data = df) -params$loglik$r = 0.5 -params$loglik$subs = 0.01 +params$mlpost$r = 0.5 +params$mlpost$subs = 0.01 transforms <- c("sigmoid","pm1","p0","p05","p2","p3") diff --git a/tests_current/Ex13_Sec6_5.R b/tests_current/Ex13_Sec6_5.R index ac47c5d..c441264 100644 --- a/tests_current/Ex13_Sec6_5.R +++ b/tests_current/Ex13_Sec6_5.R @@ -44,8 +44,8 @@ time <- df.train$time params <- gen.params.gmjmcmc(data = df.train[,-1]) -params$loglik$r = 0.5 -params$loglik$time = time #the time variable goes into the params structure +params$mlpost$r = 0.5 +params$mlpost$time = time #the time variable goes into the params structure params$feat$keep.min = 0.2 diff --git a/tests_current/Ex1_Sec3.R b/tests_current/Ex1_Sec3.R index ad7a999..9b3e6c2 100644 --- a/tests_current/Ex1_Sec3.R +++ b/tests_current/Ex1_Sec3.R @@ -22,6 +22,8 @@ summary(data) te.ind <- 540:939 df.train = data[-te.ind,] df.test = data[te.ind,] +x.train <- cbind(1, df.train[, -1]) +y.train <- df.train[, 1] to3 <- function(x) x^3 transforms <- c("sigmoid","sin_deg","exp_dbl","p0","troot","to3") @@ -37,14 +39,14 @@ use.fbms = FALSE #################################################### params <- gen.params.gmjmcmc(df.train) -params$loglik$var <- "unknown" +params$mlpost$var <- "unknown" if (use.fbms) { result.default <- fbms(formula = semimajoraxis ~ 1 + . , data = df.train, method = "gmjmcmc", transforms = transforms, params = params) } else { - result.default <- gmjmcmc(df.train, transforms = transforms, params = params) + result.default <- gmjmcmc(x.train, y.train, fixed = 1, transforms = transforms, params = params) } -summary(result.default,labels = F) +summary(result.default, labels = FALSE) preds <- predict(result.default, df.test[,-1], link = function(x) x) @@ -72,7 +74,7 @@ if (use.fbms) { result.P50 <- fbms(data = df.train, method = "gmjmcmc", transforms = transforms, P=50, N.init=1000, N.final=1000, params = params) } else { - result.P50 <- gmjmcmc(df.train, transforms = transforms, + result.P50 <- gmjmcmc(cbind(1, df.train[, -1]), df.train[, 1], fixed = 1, transforms = transforms, P=50, N.init=1000, N.final=1000, params = params) } summary(result.P50, labels = names(df.train)[-1]) @@ -88,14 +90,14 @@ if (use.fbms) { result_parallel <- fbms(data = df.train, method = "gmjmcmc.parallel", transforms = transforms, runs = 40, cores = 10, P=25,params = params) } else { - result_parallel <- gmjmcmc.parallel(runs = 40, cores = 10, data = df.train, loglik.pi = gaussian.loglik, - transforms = transforms, P=25,params = params) + result_parallel <- gmjmcmc.parallel(runs = 40, cores = 10, x = cbind(1, df.train[, -1]), y = df.train[, 1], fixed = 1, loglik.pi = gaussian.loglik, + transforms = transforms, P = 25, params = params) } summary(result_parallel, tol = 0.01) ####### fixed variance -params$loglik$var <- 1 +params$mlpost$var <- 1 set.seed(124) if (use.fbms) { result_parallel_unitphi <- fbms(data = df.train, method = "gmjmcmc.parallel", transforms = transforms, @@ -133,9 +135,9 @@ gaussian.loglik.g <- function (y, x, model, complex, params) #default for N.final = N.init -params$loglik$betaprior <- "hyper-g-n" -params$loglik$r <- 1/dim(df.train)[1] -params$loglik$alpha <- dim(df.train)[1] +params$mlpost$betaprior <- "hyper-g-n" +params$mlpost$r <- 1/dim(df.train)[1] +params$mlpost$alpha <- dim(df.train)[1] set.seed(1) if (use.fbms) { result_parallel_g <- fbms(data = df.train,family = "custom", method = "gmjmcmc.parallel",loglik.pi = lm.logpost.bas, transforms = transforms, @@ -282,7 +284,7 @@ for(prior in c("g-prior", "JZS")) { print(paste0("testing ",prior)) - params$loglik <- list(r = 1/dim(df.train)[1], betaprior = prior,alpha = max(dim(df.train)[1],(dim(df.train)[2])^2)) + params$mlpost <- list(r = 1/dim(df.train)[1], betaprior = prior,alpha = max(dim(df.train)[1],(dim(df.train)[2])^2)) #ours are stil a bit faster than the BAS ones, but BAS are relatively fine too @@ -299,7 +301,7 @@ for(prior in c("g-prior", #default for N.final = N.init params <- gen.params.gmjmcmc(df.train) -params$loglik$g <- dim(df.train)[1] +params$mlpost$g <- dim(df.train)[1] tic() result.default <- fbms(formula = semimajoraxis ~ 1 + . , data = df.train, method = "gmjmcmc.parallel",cores = 10, runs = 10, transforms = transforms, loglik.pi = gaussian.loglik.g, params = params, P = 50) time.res = toc() diff --git a/tests_current/Ex3_Sec4_1.R b/tests_current/Ex3_Sec4_1.R index 6089c74..d08c144 100644 --- a/tests_current/Ex3_Sec4_1.R +++ b/tests_current/Ex3_Sec4_1.R @@ -44,7 +44,7 @@ params$feat$pop.max <- 50 # #################################################### n = dim(df)[1]; p=dim(df)[2] -params$loglik$g <- max(n,p^2) # Using recommendation from Fernandez et al (2001) +params$mlpost$g <- max(n,p^2) # Using recommendation from Fernandez et al (2001) #this will be added to the package log_prior <- function(params,complex){ diff --git a/tests_current/Ex3_Sec4_1_Removed, replaced_by_Ex7.R b/tests_current/Ex3_Sec4_1_Removed, replaced_by_Ex7.R index 35c98f3..558347b 100644 --- a/tests_current/Ex3_Sec4_1_Removed, replaced_by_Ex7.R +++ b/tests_current/Ex3_Sec4_1_Removed, replaced_by_Ex7.R @@ -41,8 +41,8 @@ transforms = c("") probs = gen.probs.gmjmcmc(transforms) probs$gen = c(0,0,0,1) probs$filter=0.8 -params$loglik$var = "unknown" -#params$loglik$r = 1 +params$mlpost$var = "unknown" +#params$mlpost$r = 1 set.seed(123) start.time=Sys.time() @@ -112,7 +112,7 @@ params = gen.params.gmjmcmc(df[,ids3]) params$feat$check.col <- F params$feat$pop.max = 60 params$prel.select <- ids3 -params$loglik$var <- "unknown" +params$mlpost$var <- "unknown" probs = gen.probs.gmjmcmc(transforms) probs$gen = c(0,0,0,1) diff --git a/tests_current/Ex4_Sec4_2.R b/tests_current/Ex4_Sec4_2.R index 9436204..b9e2d20 100644 --- a/tests_current/Ex4_Sec4_2.R +++ b/tests_current/Ex4_Sec4_2.R @@ -36,7 +36,7 @@ df <- as.data.frame(cbind(y, X)) transforms <- c("") params <- gen.params.gmjmcmc(df) -#params$loglik$var = "unknown" #this will set the variance to unknwon +#params$mlpost$var = "unknown" #this will set the variance to unknwon probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(1,0,0,1) #Include interactions and mutations diff --git a/tests_current/Ex5_Sec4_3.R b/tests_current/Ex5_Sec4_3.R index e9bfe5f..0124956 100644 --- a/tests_current/Ex5_Sec4_3.R +++ b/tests_current/Ex5_Sec4_3.R @@ -44,7 +44,7 @@ params <- gen.params.gmjmcmc(df) params$feat$D <- 1 # Set depth of features to 1 #to set variance to unknown uncomment below -#params$loglik$var <- "unknown" +#params$mlpost$var <- "unknown" #################################################### # diff --git a/tests_current/Ex6_Sec4_4.R b/tests_current/Ex6_Sec4_4.R index 76071ca..034c0cc 100644 --- a/tests_current/Ex6_Sec4_4.R +++ b/tests_current/Ex6_Sec4_4.R @@ -66,8 +66,8 @@ probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(0,0,1,1) #Only projections! params <- gen.params.gmjmcmc(df.training) -#params$loglik$r = 0.9 -#params$loglik$var = "unknown" +#params$mlpost$r = 0.9 +#params$mlpost$var = "unknown" ############################################################################# diff --git a/tests_current/Ex8_Sec5.2_logic_regression.R b/tests_current/Ex8_Sec5.2_logic_regression.R index 95bb2ce..699e087 100644 --- a/tests_current/Ex8_Sec5.2_logic_regression.R +++ b/tests_current/Ex8_Sec5.2_logic_regression.R @@ -45,7 +45,7 @@ probs$gen <- c(1,1,0,1) #No projections allowed params <- gen.params.gmjmcmc(df.training) -params$loglik$p <- 50 #number of leaves +params$mlpost$p <- 50 #number of leaves params$feat$pop.max <- 31 params$feat$L <- 15 ############################################## @@ -103,7 +103,7 @@ if (use.fbms) { } summary(result) -mpm <- get.mpm.model(result,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$loglik) +mpm <- get.mpm.model(result,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) mbest <- get.best.model(result) @@ -152,7 +152,7 @@ if (use.fbms) { transforms = transforms, probs = probs, params = params, P=25) } summary(result_parallel) -mpm <- get.mpm.model(result_parallel,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$loglik) +mpm <- get.mpm.model(result_parallel,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) mbest <- get.best.model(result_parallel) @@ -193,13 +193,13 @@ probs$gen <- c(1,1,0,1) #No projections allowed probs$filter <- 0.6 params <- gen.params.gmjmcmc(df.training) -params$loglik$p <- 50 #number of leaves -params$loglik$n <- n #used in specifying parameter v of the tCCH prior -params$loglik$p.a <- 1 -params$loglik$p.b <- 1 -params$loglik$p.r <- 1.5 -params$loglik$p.s <- 0 -params$loglik$p.k <- 1 +params$mlpost$p <- 50 #number of leaves +params$mlpost$n <- n #used in specifying parameter v of the tCCH prior +params$mlpost$p.a <- 1 +params$mlpost$p.b <- 1 +params$mlpost$p.r <- 1.5 +params$mlpost$p.s <- 0 +params$mlpost$p.k <- 1 params$feat$pop.max <- 31 library(BAS) #needed for hypergeometric functions @@ -253,7 +253,7 @@ if (use.fbms) { } summary(result) -mpm <- get.mpm.model(result,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$loglik) +mpm <- get.mpm.model(result,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) mbest <- get.best.model(result) @@ -295,7 +295,7 @@ if (use.fbms) { transforms = transforms, probs = probs, params = params, P=25) } summary(result_parallel) -mpm <- get.mpm.model(result_parallel,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$loglik) +mpm <- get.mpm.model(result_parallel,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) mbest <- get.best.model(result_parallel) diff --git a/tests_current/Ex8_Sec5_2.R b/tests_current/Ex8_Sec5_2.R index 9e4c15a..56768ef 100644 --- a/tests_current/Ex8_Sec5_2.R +++ b/tests_current/Ex8_Sec5_2.R @@ -67,7 +67,7 @@ probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(0,0,1,1) #Only projections! params <- gen.params.gmjmcmc(df.training) -params$loglik$r = 0.2 +params$mlpost$r = 0.2 ############################################## # diff --git a/tests_current/Ex9_Sec6_1.R b/tests_current/Ex9_Sec6_1.R index ac0e6f7..b64b43a 100644 --- a/tests_current/Ex9_Sec6_1.R +++ b/tests_current/Ex9_Sec6_1.R @@ -55,7 +55,7 @@ set.seed(6001) -params$loglik = list(r = 1/dim(df)[1], family = "binomial", betaprior = g.prior(100), laplace = F) +params$mlpost = list(r = 1/dim(df)[1], family = "binomial", betaprior = g.prior(100), laplace = F) result <- fbms(data = df, method = "gmjmcmc", family = "custom", transforms = transforms, probs = probs, loglik.pi = glm.logpost.bas, params = params,P=3) @@ -84,7 +84,7 @@ if (use.fbms) { } # Default tuning parameters for logistic regression: # -# params$loglik$r = 1/n +# params$mlpost$r = 1/n summary(result) -- GitLab From db4a73a697dad0bfa09eaa4cf5af5f1bc9371090 Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Mon, 31 Mar 2025 17:51:07 +0200 Subject: [PATCH 04/36] * New testing framework in place - intercept and fixed verified in most locations. --- R/alpha_generation.R | 4 +- R/fbms.R | 29 +- R/feature.R | 8 +- R/feature_generation.R | 2 +- R/gmjmcmc.R | 6 + R/gmjmcmc_support.R | 3 +- R/likelihoods.R | 513 ++++++++++++++++++------------ R/mjmcmc.R | 19 +- R/parallel.R | 5 +- R/predict.R | 25 +- R/results.R | 58 ++-- R/summary.R | 30 +- tests/testthat/test_fbms.R | 96 ++++++ tests/testthat/test_local_optim.R | 2 +- tests/testthat/test_mjmcmc.R | 115 ++++--- 15 files changed, 594 insertions(+), 321 deletions(-) create mode 100644 tests/testthat/test_fbms.R diff --git a/R/alpha_generation.R b/R/alpha_generation.R index c3511dd..19ae72a 100644 --- a/R/alpha_generation.R +++ b/R/alpha_generation.R @@ -39,7 +39,7 @@ alpha_2 <- function (feature) { #' alpha_3 <- function (feature, data, loglik, verbose) { # Create the string representation of the feature with variable alphas - featfun <- print.feature(feature, dataset = TRUE, alphas = TRUE) + featfun <- print.feature(feature, dataset = TRUE, fixed = data$fixed, alphas = TRUE) featfun <- set_alphas(featfun) # Return if there are no alphas to set if (featfun$count == 0) return(feature) @@ -73,7 +73,7 @@ alpha_3 <- function (feature, data, loglik, verbose) { #' alpha_4 <- function (feature) { # Create the string representation of the feature with variable alphas - featfun <- print.feature(feature, dataset = TRUE, alphas = TRUE) + featfun <- print.feature(feature, dataset = TRUE, fixed = data$fixed, alphas = TRUE) featfun <- set_alphas(featfun) feature <- update.alphas(feature, rnorm(featfun$count,0,1)) diff --git a/R/fbms.R b/R/fbms.R index 6fb907e..0f92fd3 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -33,12 +33,12 @@ #' #' @seealso \code{\link{mjmcmc}}, \code{\link{gmjmcmc}}, \code{\link{gmjmcmc.parallel}} #' @export -fbms <- function(formula = NULL, family = "gaussian", - beta_prior = list(type = "g-prior", g = 5), - model_prior = list(r = exp(-0.5)), - data = NULL, impute = FALSE, - loglik.pi = gaussian.loglik, - method = "mjmcmc", verbose = TRUE, ...) { +fbms <- function (formula = NULL, family = "gaussian", + beta_prior = list(type = "g-prior", g = 5), + model_prior = list(r = exp(-0.5)), + data = NULL, impute = FALSE, + loglik.pi = gaussian.loglik, + method = "mjmcmc", verbose = TRUE, ...) { if (is.list(beta_prior) || is.list(model_prior)) { mlpost_params <- gen.mlpost.params(beta_prior, model_prior) loglik.pi <- select.mlpost.fun(beta_prior, model_prior, family) @@ -56,7 +56,7 @@ fbms <- function(formula = NULL, family = "gaussian", } na.opt <- getOption("na.action") - if(impute) + if (impute) options(na.action='na.pass') else options(na.action='na.omit') @@ -69,9 +69,9 @@ fbms <- function(formula = NULL, family = "gaussian", Y <- model.response(mf, "any") - X <- model.matrix(formula, data = data)[, -1] - intercept <- attr(terms(formula, data = data), "intercept") - if (intercept) X <- cbind(1, X) + X <- model.matrix(formula, data = data) + intercept <- attr(terms(formula, data = data), "intercept") == 1 + if (intercept) X <- X[, -1, drop = FALSE] mis.Y <- which(is.na(Y)) if (length(mis.Y) > 0) { warning("Missing values in the response. Dropped.") @@ -101,7 +101,6 @@ fbms <- function(formula = NULL, family = "gaussian", } } else { Y <- data[, 1] - X <- cbind(1, data[, -1]) intercept <- TRUE imputed <- NULL na.opt <- getOption("na.action") @@ -114,13 +113,13 @@ fbms <- function(formula = NULL, family = "gaussian", } if (method == "mjmcmc.parallel") - res <- mjmcmc.parallel(X, Y, loglik.pi, mlpost_params = mlpost_params, fixed = intercept, verbose = verbose, ...) + res <- mjmcmc.parallel(x = X, y = Y, loglik.pi = loglik.pi, mlpost_params = mlpost_params, intercept = intercept, verbose = verbose, ...) else if (method == "mjmcmc") - res <- mjmcmc(X, Y, loglik.pi, mlpost_params = mlpost_params, fixed = intercept, verbose = verbose, ...) + res <- mjmcmc(x = X, y = Y, loglik.pi = loglik.pi, mlpost_params = mlpost_params, intercept = intercept, verbose = verbose, ...) else if (method == "gmjmcmc.parallel") - res <- gmjmcmc.parallel(x = X, y = Y, loglik.pi = loglik.pi, mlpost_params = mlpost_params, fixed = intercept, verbose = verbose,...) + res <- gmjmcmc.parallel(x = X, y = Y, loglik.pi = loglik.pi, mlpost_params = mlpost_params, intercept = intercept, verbose = verbose,...) else if (method == "gmjmcmc") - res <- gmjmcmc(X, Y, loglik.pi, mlpost_params = mlpost_params, fixed = intercept, verbose = verbose, ...) + res <- gmjmcmc(x = X, y = Y, loglik.pi = loglik.pi, mlpost_params = mlpost_params, intercept = intercept, verbose = verbose, ...) else stop("Error: Method must be one of gmjmcmc, gmjmcmc.parallel, mjmcmc or mjmcmc.parallel!") diff --git a/R/feature.R b/R/feature.R index 8226e72..28c2439 100644 --- a/R/feature.R +++ b/R/feature.R @@ -109,7 +109,7 @@ update.alphas <- function (feature, alphas, recurse=FALSE) { #' print(result$populations[[1]][1]) #' #' @export -print.feature <- function (x, dataset = FALSE, alphas = FALSE, labels = FALSE, round = FALSE, ...) { +print.feature <- function (x, dataset = FALSE, fixed = 0, alphas = FALSE, labels = FALSE, round = FALSE, ...) { fString <- "" feat <- x[[length(x)]] # This is a more complex feature @@ -145,15 +145,15 @@ print.feature <- function (x, dataset = FALSE, alphas = FALSE, labels = FALSE, r if (alphas) fString <- paste0(fString, "?*") else fString <- paste0(fString, feat[j,3], "*") } - fString <- paste0(fString, print.feature(x[[feat[j, 2]]], dataset, alphas, labels, round), op) + fString <- paste0(fString, print.feature(x[[feat[j, 2]]], dataset, fixed, alphas, labels, round), op) } } fString <- paste0(fString, ")") } # This is a plain covariate else if (is.numeric(feat)) { - if (dataset) fString <- paste0("data$x[,", feat, "]") - else if (labels[1] != F) fString <- labels[feat] + if (dataset) fString <- paste0("data$x[,", fixed + feat, "]") + else if (labels[1] != FALSE) fString <- labels[feat] else fString <- paste0("x", feat) } else stop("Invalid feature structure") return(fString) diff --git a/R/feature_generation.R b/R/feature_generation.R index 1354012..2911f9f 100644 --- a/R/feature_generation.R +++ b/R/feature_generation.R @@ -96,7 +96,7 @@ check.collinearity <- function (proposal, features, F.0.size, data, mock) { # Generate features to represent the covariates, just takes the count needed gen.covariates <- function (data) { features <- list() - for (i in (data$fixed + 1):ncol(data$x)) { + for (i in seq_len(ncol(data$x) - data$fixed)) { features <- c(features, i) class(features[[length(features)]]) <- "feature" } diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index e57eb04..6b3b8e6 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -52,6 +52,7 @@ gmjmcmc <- function ( loglik.alpha = gaussian.loglik.alpha, mlpost_params = NULL, transforms, + intercept = FALSE, fixed = 0, P = 10, N.init = 100, @@ -62,6 +63,10 @@ gmjmcmc <- function ( verbose = TRUE ) { # Verify that the data is well-formed + if (intercept) { + x <- cbind(1, x) + fixed <- fixed + 1 + } data <- check.data(x, y, fixed, verbose) # Generate default probabilities and parameters if there are none supplied. @@ -164,6 +169,7 @@ gmjmcmc <- function ( ) results$labels <- labels results$fixed <- fixed + results$intercept <- intercept attr(results, "class") <- "gmjmcmc" return(results) } diff --git a/R/gmjmcmc_support.R b/R/gmjmcmc_support.R index 21c4fe0..5e4dcb1 100644 --- a/R/gmjmcmc_support.R +++ b/R/gmjmcmc_support.R @@ -98,7 +98,7 @@ marginal.probs.renorm <- function (models, type = "features") { precalc.features <- function (data, features) { precalc <- matrix(NA, nrow(data$x), length(features)) for (f in seq_along(features)) { - feature_string <- print.feature(features[[f]], dataset = TRUE) + feature_string <- print.feature(features[[f]], dataset = TRUE, fixed = data$fixed) precalc[, f] <- eval(parse(text = feature_string)) } # Replace any -Inf and Inf values caused by under- or overflow @@ -158,5 +158,6 @@ get.labels <- function (data, verbose) { if (verbose) cat("NA labels present, using x#\n") return(FALSE) } + if (data$fixed > 0) labels <- labels[-seq_len(data$fixed)] return(labels) } \ No newline at end of file diff --git a/R/likelihoods.R b/R/likelihoods.R index 0da9068..83c8908 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -64,7 +64,7 @@ glm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), Rcontrol = glm.control(), Rlaplace = as.integer(params$laplace)) }) - else + else{ suppressWarnings({ mod <- .Call(BAS:::C_glm_deterministic, y = as.numeric(y), X = as.matrix(x[,model]), @@ -74,24 +74,24 @@ glm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), modelprior = uniform(), betaprior = params$prior_beta, - family = Gamma(), + family = Gamma(), Rcontrol = glm.control(), Rlaplace = as.integer(params$laplace)) - }) + })} }, error = function(e) { # Handle the error by setting result to NULL mod <- NULL # You can also print a message or log the error if needed cat("An error occurred:", conditionMessage(e), "\n") }) - + if(length(mod)==0) { return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)),coefs = rep(0,p+1))) } - + if(p == 0) - { - ret <- mod$logmarg[2] + log(params$r) * sum(complex$oc) + { + ret <- mod$logmarg[2] + log(params$r) * sum(complex$oc) return(list(crit=ret, coefs=mod$mle[[2]])) } ret <- mod$logmarg + log(params$r) * sum(complex$oc) @@ -107,43 +107,29 @@ glm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), #' @param x The matrix containing the precalculated features #' @param model The model to estimate as a logical vector #' @param complex A list of complexity measures for the features -#' @param params A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors and in Gaussian models +#' @param params A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors where the corresponding integers as prior_beta must be provided "g-prior" = 0, "hyper-g" = 1, "EB-local" = 2, "BIC" = 3, "ZS-null" = 4, "ZS-full" = 5, "hyper-g-laplace" = 6, "AIC" = 7, "EB-global" = 2, "hyper-g-n" = 8, "JZS" = 9 and in Gaussian models #' #' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). #' #' @examples #' lm.logpost.bas(rnorm(100), cbind(1,matrix(rnorm(100))), c(TRUE,TRUE), list(oc = 1)) -#' +#' #' #' @export lm.logpost.bas lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5),prior_beta = "g-prior",alpha = 4)) { if (length(params) == 0) - params <- list(r = 1/dim(x)[1], prior_beta = "g-prior",alpha = max(dim(x)[1],sum(model)^2)) - - method.num <- switch( - params$prior_beta, - "g-prior" = 0, - "hyper-g" = 1, - "EB-local" = 2, - "BIC" = 3, - "ZS-null" = 4, - "ZS-full" = 5, - "hyper-g-laplace" = 6, - "AIC" = 7, - "EB-global" = 2, - "hyper-g-n" = 8, - "JZS" = 9 - ) - - p <- sum(model) - 1 + params <- list(r = 1/dim(x)[1], prior_beta = 0,alpha = max(dim(x)[1],sum(model)^2)) + + + p <- sum(model) - 1 if(p==0) - { + { probinit <- as.numeric(c(1,0.99)) model[2] <- T }else{ probinit <- as.numeric(c(1,rep(0.99,p))) } - + mod<-NULL tryCatch({ @@ -154,8 +140,8 @@ lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5),pr probinit, as.integer(rep(0,ifelse(p==0,2,1))), incint = as.integer(F), - alpha = as.numeric(params$alpha), - method = as.integer(method.num), + alpha = ifelse(length(params$alpha)>0,as.numeric(params$alpha),NULL), + method = as.integer(params$prior_beta), modelprior = uniform(), Rpivot = TRUE, Rtol = 1e-7) @@ -166,17 +152,17 @@ lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5),pr # You can also print a message or log the error if needed cat("An error occurred:", conditionMessage(e), "\n") }) - + if(length(mod)==0) { return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)),coefs = rep(0,p+1))) } - + if(p == 0) - { - ret <- mod$logmarg[2] + log(params$r) * sum(complex$oc) + { + ret <- mod$logmarg[2] + log(params$r) * sum(complex$oc) return(list(crit=ret, coefs=mod$mle[[2]])) } - ret <- mod$logmarg + log(params$r) * sum(complex$oc) + ret <- mod$logmarg + log(params$r) * sum(complex$oc) return(list(crit=ret, coefs=mod$mle[[1]])) } @@ -195,7 +181,7 @@ lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5),pr #' #' @examples #' logistic.loglik(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) -#' +#' #' #' @export logistic.loglik logistic.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5))) { @@ -206,6 +192,42 @@ logistic.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5))) return(list(crit=ret, coefs=mod$coefficients)) } +#' Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user, family must be specified +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' glm.loglik(abs(rnorm(100))+1, matrix(rnorm(100)), TRUE, list(oc = 1)) +#' +#' +#' @export glm.loglik +glm.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5),family = "Gamma")) { + if (length(params) == 0) + params <- list(r = 1/dim(x)[1]) + + if(params$family == "binomial") + { + fam = binomial() + }else if(params$family == "poisson"){ + fam = poisson() + }else + { + fam = Gamma() + } + + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = fam)}) + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * sum(complex$oc))) / 2 + return(list(crit=ret, coefs=mod$coefficients)) +} + #' Log likelihood function for gaussian regression with a Jeffreys prior and BIC approximation of MLIK with both known and unknown variance of the responses #' @@ -219,24 +241,26 @@ logistic.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5))) #' #' @examples #' gaussian.loglik(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), NULL) -#' +#' #' #' @export gaussian.loglik gaussian.loglik <- function (y, x, model, complex, params) { - if(length(params)==0) + if (sum(model) == 0) + return(list(crit = -Inf, coefs = numeric())) + if (length(params) == 0) params <- list() if (length(params$r) == 0) params$r <- 1/dim(x)[1] - if(length(params$var) == 0) + if (length(params$var) == 0) params$var <- 1 suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = gaussian())}) - - if(params$var == "unknown") + + if (params$var == "unknown") ret <- (-(mod$aic + (log(length(y))-2) * (mod$rank) - 2 * log(params$r) * (sum(complex$oc)))) / 2 else - ret <- (-(mod$deviance/params$var + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * (sum(complex$oc)))) / 2 - - return(list(crit=ret, coefs=mod$coefficients)) + ret <- (-(mod$deviance/params$var + log(length(y)) * (mod$rank - 1) - 2 * log_prior(params, complex))) / 2 + + return(list(crit = ret, coefs = mod$coefficients)) } @@ -254,29 +278,34 @@ gaussian.loglik <- function (y, x, model, complex, params) { #' gaussian.loglik.g(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) #' #' @export gaussian.loglik.g -gaussian.loglik.g <- function (y, x, model, complex, params = NULL) { +gaussian.loglik.g <- function (y, x, model, complex, params = NULL) +{ + if(length(params)==0) + params <- list() + if (length(params$r) == 0) + params$r <- 1/dim(x)[1] suppressWarnings({ mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) }) - # Calculate R-squared y_mean <- mean(y) TSS <- sum((y - y_mean)^2) RSS <- sum(mod$residuals^2) Rsquare <- 1 - (RSS / TSS) - - if (length(params$r) == 0) { - params$r <- 1/dim(x)[1] + + if (length(params$r) == 0 || length(params$g) == 0) + { + params$r <- 1/dim(x)[1] params$g <- max(mod$rank^2,length(y)) } - + # logarithm of marginal likelihood mloglik <- 0.5*(log(1.0 + params$g) * (dim(x)[1] - mod$rank) - log(1.0 + params$g * (1.0 - Rsquare)) * (dim(x)[1] - 1))*(mod$rank!=1) - + # logarithm of model prior - # default value or parameter r + # default value or parameter r lp <- log_prior(params, complex) - + return(list(crit = mloglik + lp, coefs = mod$coefficients)) } @@ -300,9 +329,9 @@ gaussian.loglik.g <- function (y, x, model, complex, params = NULL) { #' #' @importFrom BAS phi1 hypergeometric1F1 hypergeometric2F1 #' @importFrom tolerance F1 -#' @export -gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = exp(-0.5), prior_beta = "Intrinsic")) { - +#' @export +gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = exp(-0.5), prior_beta = "intrinsic")) { + # Fit the linear model using fastglm fitted_model <- fastglm(as.matrix(x[, model]), y, family = gaussian()) log_likelihood <- -(fitted_model$aic -2 * (fitted_model$rank))/2 @@ -311,115 +340,122 @@ gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = TSS <- sum((y - y_mean)^2) RSS <- sum(fitted_model$residuals^2) R2_M <- 1 - (RSS / TSS) - + p_M <- fitted_model$rank n <- length(y) - + # Switch-like structure to assign hyperparameters based on prior - if (params$prior_beta == "CH") { + if (params$prior_beta[[1]] == "CH") { # CH prior: b and s should be user-specified, with defaults if not provided - a <- ifelse(!is.null(params$a),params$a, 1) # Default to 1 if not specified - b <- ifelse(!is.null(params$b),params$b, 2) # Default to 1 if not specified + a <- ifelse(!is.null(params$prior_beta$a),params$prior_beta$a, 1) # Default to 1 if not specified + b <- ifelse(!is.null(params$prior_beta$b),params$prior_beta$b, 2) # Default to 1 if not specified r <- 0 - s <- ifelse(!is.null(params$s), params$s, 1) # Default to 1 if not specified + s <- ifelse(!is.null(params$prior_beta$s), params$prior_beta$s, 1) # Default to 1 if not specified v <- 1 k <- 1 - - } else if (params$prior_beta == "Hyper-g") { + + } else if (params$prior_beta[[1]] == "hyper-g") { a <- 1 b <- 2 r <- 0 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta == "Uniform") { + + } else if (params$prior_beta[[1]] == "uniform") { a <- 2 b <- 2 r <- 0 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta == "Jeffreys") { + + } else if (params$prior_beta[[1]] == "Jeffreys") { a <- 0.0001 b <- 2 r <- 0 s <- 0 v <- 1 k <- 1 - } else if (params$prior_beta == "Beta-prime") { + } else if (params$prior_beta[[1]] == "beta.prime") { a <- 1/2 b <- n - p_M - 1.5 r <- 0 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta == "Benchmark") { + + } else if (params$prior_beta[[1]] == "benchmark") { a <- 0.02 b <- 0.02 * max(n, p_M^2) r <- 0 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta == "TruncGamma") { - - a <- 2 * ifelse(!is.null(params$at),params$at, 1) + + } else if (params$prior_beta[[1]] == "TG") { + + a <- 2 * ifelse(!is.null(params$prior_beta$a),params$prior_beta$a, 1) b <- 2 r <- 0 - s <- 2 * ifelse(!is.null(params$st),params$st, 1) + s <- 2 * ifelse(!is.null(params$prior_beta$s),params$prior_beta$s, 1) v <- 1 k <- 1 - - } else if (params$prior_beta == "ZS adapted") { + + } else if (params$prior_beta[[1]] == "ZS-adapted") { a <- 1 b <- 2 r <- 0 s <- n + 3 v <- 1 k <- 1 - } else if (params$prior_beta == "Robust") { + } else if (params$prior_beta[[1]] == "robust") { a <- 1 b <- 2 r <- 1.5 s <- 0 v <- (n + 1) / (p_M + 1) k <- 1 - - } else if (params$prior_beta == "Hyper-g/n") { + + } else if (params$prior_beta[[1]] == "hyper-g-n") { a <- 1 b <- 2 r <- 1.5 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta == "Intrinsic") { + + } else if (params$prior_beta[[1]] == "intrinsic") { a <- 1 b <- 1 r <- 1 s <- 0 v <- (n + p_M + 1) / (p_M + 1) k <- (n + p_M + 1) / n - - } else { + + }else if (params$prior_beta[[1]] == "tCCH") { + a <- params$prior_beta$a + b <- params$prior_beta$b + r <- params$prior_beta$rho + s <- params$prior_beta$s + v <- params$prior_beta$v + k <- params$prior_beta$k + }else { stop("Unknown prior name: ", params$prior_beta) } - + # if (!is.null(r) & r == 0) { - + term1 <- lbeta((a + p_M) / 2, b / 2) term2 <- phi1(b / 2, (n - 1) / 2, (a + b + p_M) / 2, s / (2 * v), min(0.8,R2_M/(v - (v - 1) * R2_M),log = T)) - + if(R2_M/(v - (v - 1) * R2_M)>0.8) { warning("Infinite marginal log likelihood! phi1 last argument reduced to 0.8. Use a different prior_beta (Robust, Hyper-g/n, Intrinsic, or g-prior)") } - - term3 <- lbeta(a / 2, b / 2) + + term3 <- lbeta(a / 2, b / 2) term4 <- hypergeometric1F1(b / 2, (a + b) / 2, s / (2 * v),log = T) marginal_likelihood <- log_likelihood + (term1) + (term2) - (p_M / 2) * log(v) - ((n - 1) / 2)*log(1 - (1 - 1 / v) * R2_M) - (term3) - (term4) } else if (!is.null(s) & s == 0) { @@ -427,15 +463,15 @@ gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = term2 <- hypergeometric2F1(r, b / 2, (a + b) / 2, 1 - k,log = T) term3 <- F1((a + p_M) / 2, (a + b + p_M + 1 - n - 2 * r) / 2, (n - 1) / 2, (a + b + p_M) / 2, 1 - k, 1 - k - (R2_M^2 * k) / ((1 - R2_M) * v)) marginal_likelihood <- log_likelihood + (a+p_M-2*r)/2*log(k) + (term1) - (term2) - (term3) - (p_M / 2) * log(v) - log(1 - R2_M) * ((n - 1) / 2) - lbeta(a / 2, b / 2) - + } else { stop("Invalid inputs: either r = 0 or s = 0 must be specified.") } - + if (length(params$r) == 0) params$r <- 1/dim(x)[1] # default value or parameter r - + lp <- log_prior(params, complex) - + return(list(crit = marginal_likelihood + lp, coefs = fitted_model$coefficients)) } @@ -455,7 +491,7 @@ gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = #' #' @examples #' logistic.loglik.ala(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) -#' +#' #' #' @export logistic.loglik.ala logistic.loglik.ala <- function (y, x, model, complex, params = list(r = exp(-0.5))) { @@ -495,7 +531,7 @@ logistic.loglik.alpha <- function (a, data, mu_func) { #' as a string with the alphas as a\[i\]. #' #' @return A numeric with the log likelihood. -#' @examples +#' @examples #'\dontrun{ #'gaussian.loglik.alpha(my_alpha,my_data,my_mu) #'} @@ -506,20 +542,22 @@ gaussian.loglik.alpha <- function (a, data, mu_func) { } -#' Log model prior function +#' Log model prior function #' @param params list of passed parameters of the likelihood in GMJMCMC -#' @param complex list of complexity measures of the features included into the model -#' +#' @param complex list of complexity measures of the features included into the model +#' #' @return A numeric with the log model prior. -#' +#' #' @examples #' log_prior(params = list(r=2), complex = list(oc = 2)) -#' +#' #' @export log_prior log_prior <- function (params, complex) { pl <- log(params$r) * (sum(complex$oc)) return(pl) } + + #' Master Log Marginal Likelihood Function #' #' This function serves as a unified interface to compute the log marginal likelihood @@ -531,45 +569,36 @@ log_prior <- function (params, complex) { #' @param complex A list of complexity measures for the features. #' @param params A list of parameters controlling the model family, prior, and tuning parameters. #' Key elements include: -#' - family: "binomial", "poisson", "gamma", or "gaussian" (default: "gaussian") +#' - family: "binomial", "poisson", "gamma" (all three referred to as GLM below), or "gaussian" (default: "gaussian") #' - prior_beta: Type of prior as a string (default: "g-prior"). Possible values include: -#' - "beta.prime": Beta-prime prior (GLM, requires `n`) -#' - "bic.prior": BIC-based prior (GLM, requires `n`) -#' - "CCH": Chen-Clyde-Hsu prior (GLM, requires `a`, `b`, optionally `s`) -#' - "EB.local": Empirical Bayes local prior (GLM/Gaussian BAS, requires `alpha` for Gaussian) +#' - "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +#' - "CH": Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, optionally `s`) +#' - "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires `a` for Gaussian) +#' - "EB-global": Empirical Bayes local prior (Gaussian, requires `a` for Gaussian) #' - "g-prior": Zellner's g-prior (GLM/Gaussian, requires `g`) -#' - "hyper.g": Hyper-g prior (GLM, requires `a`) -#' - "hyper.g.n": Hyper-g/n prior (GLM, requires `a`, `n`) -#' - "tCCH": Truncated Chen-Clyde-Hsu prior (GLM, requires `a`, `b`, optionally `s`, `rho`, `v`, `k`) -#' - "intrinsic": Intrinsic prior (GLM, requires `n`) -#' - "testBF.prior": Test Bayes factor prior (GLM, requires `g`) -#' - "TG": Truncated Gamma prior (GLM, requires `a`) +#' - "hyper-g": Hyper-g prior (GLM/Gaussian, requires `a`) +#' - "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires `a`) +#' - "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, `s`, `rho`, `v`, `k`) +#' - "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +#' - "TG": Truncated Gamma prior (GLM/Gamma, requires `a`, `s`) #' - "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) -#' - "uniform": Uniform prior (GLM, no additional args) -#' - "CH": Custom Chen-Hsu prior (Gaussian TCCH, requires `a`, `b`, optionally `s`) -#' - "Hyper-g": Hyper-g prior (Gaussian TCCH, no additional args) -#' - "Uniform": Uniform prior (Gaussian TCCH, no additional args) -#' - "Beta-prime": Beta-prime prior (Gaussian TCCH, no additional args) -#' - "Benchmark": Benchmark prior (Gaussian TCCH, no additional args) -#' - "TruncGamma": Truncated Gamma prior (Gaussian TCCH, requires `at`, `st`) -#' - "ZS adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) -#' - "Robust": Robust prior (Gaussian TCCH, no additional args) -#' - "Hyper-g/n": Hyper-g/n prior (Gaussian TCCH, no additional args) -#' - "Intrinsic": Intrinsic prior (Gaussian TCCH, no additional args) -#' - "hyper-g": Hyper-g prior (Gaussian BAS, requires `alpha`) -#' - "BIC": BIC prior (Gaussian BAS, requires `alpha`) -#' - "ZS-null": Zellner-Siow null prior (Gaussian BAS, requires `alpha`) -#' - "ZS-full": Zellner-Siow full prior (Gaussian BAS, requires `alpha`) -#' - "hyper-g-laplace": Hyper-g Laplace prior (Gaussian BAS, requires `alpha`) -#' - "AIC": AIC prior (Gaussian BAS, requires `alpha`) -#' - "JZS": Jeffreys-Zellner-Siow prior (Gaussian BAS, requires `alpha`) -#' - r: Model complexity penalty (default: 1/length(y)) +#' - "uniform": Uniform prior (GLM/Gaussian, no additional args) +#' - "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +#' - "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +#' - "robust": Robust prior (Gaussian/GLM, no additional args) +#' - "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +#' - "ZS-null": Zellner-Siow null prior (Gaussian, requires `a`) +#' - "ZS-full": Zellner-Siow full prior (Gaussian, requires `a`) +#' - "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires `a`) +#' - "AIC": AIC prior from BAS (Gaussian, requires penalty `a`) +#' - "BIC": BIC prior from BAS (Gaussian/GLM) +#' - "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires `a`) +#' - r: Model complexity penalty (default: 1/n) #' - g: Tuning parameter for g-prior (default: max(n, p^2)) #' - a, b, s, v, rho, k: Hyperparameters for various priors -#' - at, st: Additional parameters for TruncGamma prior #' - n: Sample size for some priors (default: length(y)) #' - var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") -#' - laplace: Logical for Laplace approximation in GLM (default: FALSE) +#' - laplace: Logical for Laplace approximation in GLM only (default: FALSE) #' #' @return A list with elements: #' \item{crit}{Log marginal likelihood combined with the log prior.} @@ -578,111 +607,181 @@ log_prior <- function (params, complex) { #' @examples #' fbms.mlik.master(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), list(family = "gaussian", prior_beta = "g-prior")) #' -#' @importFrom BAS beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic testBF.prior TG Jeffreys uniform +#' @importFrom BAS beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform #' @export fbms.mlik.master <- function(y, x, model, complex, params = list(family = "gaussian", prior_beta = "g-prior", r = exp(-0.5))) { # Extract dimensions n <- length(y) p <- sum(model) - 1 # Number of predictors excluding intercept - + # Set default parameters if not fully specified if (is.null(params$family)) params$family <- "gaussian" if (is.null(params$prior_beta)) params$prior_beta <- "g-prior" if (is.null(params$g)) params$g <- max(p^2, n) - if (is.null(params$r)) params$r <- 1/length(y) - - # Ensure complex has oc if not provided - if (is.null(complex$oc)) complex$oc <- 1 - + if (is.null(params$n)) params$n <- n + if (is.null(params$r)) params$r <- 1/n + + # Ensure complex has oc if not provided, ignore by default + if (is.null(complex$oc)) complex$oc <- 0 + # Homogenize and prepare params for nested calls params_master <- params params_nested <- list(r = params_master$r) - + # Define valid priors for each family - glm_priors <- c("beta.prime", "bic.prior", "CCH", "EB.local", "g-prior", "hyper.g", "hyper.g.n", - "tCCH", "intrinsic", "testBF.prior", "TG", "Jeffreys", "uniform") - gaussian_tcch_priors <- c("CH", "Hyper-g", "Uniform", "Beta-prime", "Benchmark", "TruncGamma", - "ZS adapted", "Robust", "Hyper-g/n", "Intrinsic") - gaussian_bas_priors <- c("g-prior", "hyper-g", "EB.local", "BIC", "ZS-null", "ZS-full", - "hyper-g-laplace", "AIC", "JZS") - + #glm_only_priors <- c("CCH", "tCCH", "TG") + glm_and_gaussian_priors <- c("CH", "tCCH", "TG","beta.prime", "EB-local", "g-prior", "hyper-g", "hyper-g-n", + "intrinsic", "ZS-adapted", "Jeffreys", "uniform", "benchmark", "robust", "Jeffreys-BIC") + gaussian_only_priors <- c("ZS-null", "ZS-full", "hyper-g-laplace","BIC", "AIC", "JZS","EB-global") + + #review a bit + gaussian_not_robust <- c("CH", "tCCH", "ZS-adapted", "TG","beta.prime", "benchmark","Jeffreys") + gaussian_robust <- c("g-prior", "hyper-g", "EB-local","BIC", "Jeffreys-BIC", "ZS-null", "ZS-full", "hyper-g-laplace", + "AIC", "hyper-g-n", "JZS") + gaussian_tcch <- c("CH", "tCCH", "TG","beta.prime", "intrinsic", "ZS-adapted", "uniform","Jeffreys", "benchmark", "robust") + gaussian_bas <- c("g-prior", "hyper-g", "EB-local","ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS") + + all_priors <- c(glm_and_gaussian_priors, gaussian_only_priors) + #browser() + # Validate prior_beta + if (!params_master$prior_beta %in% all_priors) { + stop(sprintf("Prior '%s' is not supported. Supported priors: %s", + params_master$prior_beta, paste(all_priors, collapse = ", "))) + } + # Decision logic based on family and prior_beta if (params_master$family %in% c("binomial", "poisson", "gamma")) { - if (!params_master$prior_beta %in% glm_priors) { - stop(sprintf("Prior '%s' is not supported for family '%s'. Supported GLM priors: %s", - params_master$prior_beta, params_master$family, paste(glm_priors, collapse = ", "))) + if (params_master$prior_beta %in% gaussian_only_priors) { + stop(sprintf("Prior '%s' is not supported for GLM family '%s'. Supported GLM priors: %s", + params_master$prior_beta, params_master$family, + paste(c(glm_only_priors, glm_and_gaussian_priors), collapse = ", "))) } - + params_nested$family <- params_master$family if (is.null(params_master$laplace)) params_nested$laplace <- FALSE else params_nested$laplace <- params_master$laplace - - if (params_master$prior_beta == "Jeffreys" && params_master$family == "binomial") { - # Use logistic.loglik for binomial with Jeffreys prior and BIC approximation - result <- logistic.loglik(y, x, model, complex, params_nested) + + #if(params_nested$laplace) + # print("Laplace approximations will be used") + + if (params_master$prior_beta == "Jeffreys-BIC") { + if(params_nested$family == "binomial") + result <- logistic.loglik(y, x, model, complex, params_nested) + else if(params_nested$family%in% c("poisson", "gamma")) + result <- glm.loglik(y, x, model, complex, params_nested) + } else { - # Use glm.logpost.bas for binomial, poisson, or gamma with BAS priors params_nested$prior_beta <- switch( params_master$prior_beta, - "beta.prime" = beta.prime(n = if (is.null(params_master$n)) n else params_master$n), - "bic.prior" = bic.prior(n = if (is.null(params_master$n)) n else params_master$n), - "CCH" = CCH(alpha = if (is.null(params_master$a)) 1 else params_master$a, - beta = if (is.null(params_master$b)) 2 else params_master$b, - s = if (is.null(params_master$s)) 0 else params_master$s), - "EB.local" = EB.local(), - "g-prior" = g.prior(g = if (is.null(params_master$g)) max(n, p + 1) else params_master$g), - "hyper.g" = hyper.g(alpha = if (is.null(params_master$a)) 3 else params_master$a), - "hyper.g.n" = hyper.g.n(alpha = if (is.null(params_master$a)) 3 else params_master$a, - n = if (is.null(params_master$n)) n else params_master$n), - "tCCH" = tCCH(alpha = if (is.null(params_master$a)) 1 else params_master$a, - beta = if (is.null(params_master$b)) 2 else params_master$b, - s = if (is.null(params_master$s)) 0 else params_master$s, - r = if (is.null(params_master$rho)) 3/2 else params_master$rho, - v = if (is.null(params_master$v)) 1 else params_master$v, - theta = if (is.null(params_master$k)) 1 else params_master$k), - "intrinsic" = intrinsic(n = if (is.null(params_master$n)) n else params_master$n), - "testBF.prior" = testBF.prior(g = if (is.null(params_master$g)) max(n, p + 1) else params_master$g), - "TG" = TG(alpha = if (is.null(params_master$a)) 2 else params_master$a), + "beta.prime" = beta.prime(n = n), + "CH" = CCH(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + beta = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), + "EB-local" = EB.local(), + "g-prior" = g.prior(g = params_master$g), + "hyper-g" = hyper.g(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), + "tCCH" = tCCH(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + beta = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s, + r = if (is.null(params_master$rho)) stop("rho must be provided") else params_master$rho, + v = if (is.null(params_master$v)) stop("v must be provided") else params_master$v, + theta = if (is.null(params_master$k)) stop("k must be provided") else params_master$k), + "intrinsic" = intrinsic(n = params_master$n), + "TG" = TG(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), "Jeffreys" = Jeffreys(), - "uniform" = uniform(), + "uniform" = tCCH(alpha = 2, + beta = 2, + s = 0, + r = 0, + v = 1, + theta = 1), + "benchmark" = tCCH(alpha = 0.02, + beta = 0.02*max(n,p^2), + s = 0, + r = 0, + v = 1, + theta = 1), + "ZS-adapted" = tCCH(alpha = 1, + beta = 2, + s = n + 3, + r = 0, + v = 1, + theta = 1), + "TG" = TG(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), + "robust" = robust(n = if (is.null(params_master$n)) as.numeric(n) else as.numeric(params_master$n)), + "hyper-g-n" = hyper.g.n(alpha = if (is.null(params_master$a)) 3 else params_master$a, + n = params_master$n), + "BIC" = bic.prior(n = if (is.null(params_master$n)) n else params_master$n), stop("Unrecognized prior_beta for GLM: ", params_master$prior_beta) ) result <- glm.logpost.bas(y, x, model, complex, params_nested) } } else if (params_master$family == "gaussian") { - if (!(params_master$prior_beta %in% c(glm_priors, gaussian_tcch_priors, gaussian_bas_priors))) { - stop(sprintf("Prior '%s' is not supported for family 'gaussian'. Supported priors: %s, %s, %s", - params_master$prior_beta, paste(glm_priors, collapse = ", "), - paste(gaussian_tcch_priors, collapse = ", "), paste(gaussian_bas_priors, collapse = ", "))) + + if (params_master$prior_beta %in% gaussian_not_robust) { + warning(sprintf("Prior '%s' is not reccomended supported for Gaussian family '%s'. Can be unstable for strong signals (R^2 > 0.9). Reccomended priors under Gaussian family: %s", + params_master$prior_beta, params_master$family, + paste(gaussian_robust, collapse = ", "))) } - + params_nested$r <- params_master$r - - if (params_master$prior_beta == "g-prior" && is.null(params_master$method.num)) { - # Use gaussian.loglik.g for Zellner's g-prior - if (is.null(params_master$g)) params_nested$g <- max(n, p^2) else params_nested$g <- params_master$g - result <- gaussian.loglik.g(y, x, model, complex, params_nested) - } else if (params_master$prior_beta == "Jeffreys" && is.null(params_master$method.num)) { - # Use gaussian.loglik for Jeffreys prior with BIC approximation + + if (params_master$prior_beta %in% gaussian_tcch) { + + params_nested$prior_beta <- switch( + params_master$prior_beta, + "beta.prime" = list("beta.prime"), + "CH" = list("CH",a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + b = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), + "tCCH" = list("tCCH", a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + b = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s, + rho = if (is.null(params_master$rho)) stop("rho must be provided") else params_master$rho, + v = if (is.null(params_master$v)) stop("v must be provided") else params_master$v, + k = if (is.null(params_master$k)) stop("k must be provided") else params_master$k), + "intrinsic" = list("intrinsic"), + "TG" = list("TG",a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), + "Jeffreys" = list("Jeffreys"), + "ZS-adapted" = list("ZS-adapted"), + "benchmark" = list("benchmark"), + "robust" = list("robust"), + "uniform" = list("uniform"), + stop("Unrecognized prior_beta for Gaussian GLM: ", params_master$prior_beta) + ) + result <- gaussian_tcch_log_likelihood(y, x, model, complex, params_nested) + + }else if (params_master$prior_beta == "Jeffreys-BIC") { if (is.null(params_master$var)) params_nested$var <- "unknown" else params_nested$var <- params_master$var result <- gaussian.loglik(y, x, model, complex, params_nested) - } else if (params_master$prior_beta %in% gaussian_tcch_priors) { - # Use gaussian_tcch_log_likelihood for TCCH priors - params_nested$prior_beta <- params_master$prior_beta - if (!is.null(params_master$a)) params_nested$a <- params_master$a - if (!is.null(params_master$b)) params_nested$b <- params_master$b - if (!is.null(params_master$s)) params_nested$s <- params_master$s - if (!is.null(params_master$v)) params_nested$v <- params_master$v - if (!is.null(params_master$k)) params_nested$k <- params_master$k - if (!is.null(params_master$at)) params_nested$at <- params_master$at - if (!is.null(params_master$st)) params_nested$st <- params_master$st - result <- gaussian_tcch_log_likelihood(y, x, model, complex, params_nested) - } else if (params_master$prior_beta %in% gaussian_bas_priors) { - # Use lm.logpost.bas for BAS priors - params_nested$prior_beta <- params_master$prior_beta - if (is.null(params_master$alpha)) params_nested$alpha <- max(n, (p + 1)^2) else params_nested$alpha <- params_master$alpha - result <- lm.logpost.bas(y, x, model, complex, params_nested) + } else if (params_master$prior_beta %in% gaussian_bas) { + + params_nested$prior_beta <- switch( + params_master$prior_beta, + "g-prior" = 0, + "hyper-g" = 1, + "EB-local" = 2, + "BIC" = 3, + "ZS-null" = 4, + "ZS-full" = 5, + "hyper-g-laplace" = 6, + "AIC" = 7, + "EB-global" = 2, + "hyper-g-n" = 8, + "JZS" = 9 + ) + if(params_master$prior_beta == "g-prior") + { + if (!is.null(params_master$g)) params_nested$g <- params_master$g else stop("g must be provided") + result <- gaussian.loglik.g(y, x, model, complex, params_nested) + } + else{ + if (!is.null(params_master$a)) params_nested$alpha <- params_master$a else params_nested$alpha = -1 + result <- lm.logpost.bas(y, x, model, complex, params_nested) + } + } else { - stop(sprintf("Prior '%s' is not supported for Gaussian family in this context.", params_master$prior_beta)) + stop("Unexpected error in prior_beta logic for Gaussian.") } } else { stop("Unsupported family: ", params_master$family, ". Supported families are 'binomial', 'poisson', 'gamma', or 'gaussian'.") diff --git a/R/mjmcmc.R b/R/mjmcmc.R index 199261b..12cdb2b 100644 --- a/R/mjmcmc.R +++ b/R/mjmcmc.R @@ -31,8 +31,24 @@ #' plot(result) #' #' @export mjmcmc -mjmcmc <- function (x, y, loglik.pi = gaussian.loglik, mlpost_params = NULL, fixed = 0, N = 100, probs = NULL, params = NULL, sub = FALSE, verbose = TRUE) { +mjmcmc <- function ( + x, + y, + loglik.pi = gaussian.loglik, + mlpost_params = NULL, + intercept = FALSE, + fixed = 0, + N = 100, + probs = NULL, + params = NULL, + sub = FALSE, + verbose = TRUE +) { # Verify that data is well-formed + if (intercept) { + x <- cbind(1, x) + fixed <- fixed + 1 + } labels <- names(x) data <- check.data(x, y, fixed, verbose) @@ -63,6 +79,7 @@ mjmcmc <- function (x, y, loglik.pi = gaussian.loglik, mlpost_params = NULL, fix # Return formatted results result$fixed <- data$fixed result$labels <- labels + result$intercept <- intercept class(result) <- "mjmcmc" return(result) } diff --git a/R/parallel.R b/R/parallel.R index bb8d7cc..fa3eadc 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -94,7 +94,10 @@ rmclapply <- function(runs, args, fun, mc.cores = NULL) { #' #' @export mjmcmc.parallel <- function(runs = 2, cores = getOption("mc.cores", 2L), ...) { - results <- rmclapply(seq_len(runs), args = list(...), mc.cores = cores, fun = mjmcmc) + results <- list() + results$chains <- rmclapply(seq_len(runs), args = list(...), mc.cores = cores, fun = mjmcmc) + results$fixed <- results$chains[[1]]$fixed + results$intercept <- results$chains[[1]]$intercept class(results) <- "mjmcmc_parallel" gc() return(results) diff --git a/R/predict.R b/R/predict.R index 0c71949..39887ef 100644 --- a/R/predict.R +++ b/R/predict.R @@ -72,7 +72,7 @@ predict.gmjmcmc <- function (object, x, link = function(x) x, quantiles = c(0.02 transforms.bak <- set.transforms(object$transforms) x <- impute_x(object, x) - merged <- merge_results(list(object), data = list(x = x), populations = pop, tol = tol) + merged <- merge_results(list(object), data = list(x = x, object$fixed), populations = pop, tol = tol) set.transforms(transforms.bak) return(predict.gmjmcmc_merged(merged, x, link, quantiles)) } @@ -124,10 +124,13 @@ predict.gmjmcmc.2 <- function (object, x, link = function(x) x, quantiles = c(0. #' @export predict.gmjmcmc_merged <- function (object, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), pop = NULL, tol = 0.0000001, ...) { x <- impute_x(object, x) + if (object$intercept) { + x <- cbind(1, x) + } transforms.bak <- set.transforms(object$transforms) if (!is.null(pop)) - object <- merge_results(object$results.raw, pop, 2, tol, data = x) + object <- merge_results(object$results.raw, pop, 2, tol, data = list(x = x, fixed = object$fixed)) preds <- list() for (i in seq_along(object$results)) { @@ -183,10 +186,14 @@ predict.gmjmcmc_merged <- function (object, x, link = function(x) x, quantiles = predict.mjmcmc <- function (object, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), ...) { # Select the models and features to predict from at this iteration x <- impute_x(object, x) + + if (object$intercept) { + x <- cbind(1, x) + } models <- c(object$models, object$lo.models)[object$model.probs.idx] - yhat <- matrix(0, nrow=nrow(x), ncol=length(models)) + yhat <- matrix(0, nrow = nrow(x), ncol = length(models)) for (k in seq_along(models)) { # Models which have 0 weight are skipped since they may also be invalid, and would not influence the predictions. if (models[[k]]$crit == -.Machine$double.xmax) next @@ -212,19 +219,13 @@ predict.mjmcmc <- function (object, x, link = function(x) x, quantiles = c(0.025 #' #' @export predict.mjmcmc_parallel <- function (object, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), ...) { - max.crits <- numeric() x <- impute_x(object, x) - - for (i in seq_along(object)) { - max.crits <- c(max.crits, object[[i]]$best.crit) - } + + max.crits <- sapply(object$chains, function (x) x$best.crit) max.crit <- max(max.crits) result.weights <- exp(max.crits - max.crit) / sum(exp(max.crits - max.crit)) - preds <- list() - for (i in seq_along(object)) { - preds[[i]] <- predict.mjmcmc(object[[i]], x, link, quantiles) - } + preds <- lapply(object$chains, predict.mjmcmc,x, link, quantiles) aggr <- list() aggr$mean <- 0 * preds[[1]]$mean diff --git a/R/results.R b/R/results.R index 2d153e6..6bd6562 100644 --- a/R/results.R +++ b/R/results.R @@ -107,7 +107,9 @@ merge_results <- function (results, populations = NULL, complex.measure = NULL, accept.tot <- results[[i]]$accept.tot best <- results[[i]]$best for (item in names(results[[i]])) { - if (!(item %in% (c("accept.tot", "best", "transforms", "fixed")))) results[[i]][[item]] <- results[[i]][[item]][pops.use[[i]]] + if (!(item %in% (c("accept.tot", "best", "transforms", "fixed", "intercept")))) { + results[[i]][[item]] <- results[[i]][[item]][pops.use[[i]]] + } } results[[i]]$accept.tot <- accept.tot results[[i]]$best <- best @@ -129,6 +131,7 @@ merge_results <- function (results, populations = NULL, complex.measure = NULL, if (is.null(data)) mock.data <- list(x = matrix(runif((feat.count)^2, -100, 100), ncol = feat.count)) else mock.data <- data mock.data$fixed = results[[1]]$fixed + if (results[[1]]$intercept) mock.data$x <- cbind(1, mock.data$x) mock.data.precalc <- precalc.features(mock.data, features)$x[ , seq_len(feat.count) + results[[1]]$fixed, drop = FALSE] @@ -167,7 +170,8 @@ merge_results <- function (results, populations = NULL, complex.measure = NULL, best.log.posteriors = bests, rep.thread = pw$thread.best, transforms = results[[1]]$transforms, - fixed = results[[1]]$fixed + fixed = results[[1]]$fixed, + intercept = results[[1]]$intercept ) attr(merged, "class") <- "gmjmcmc_merged" return(merged) @@ -230,7 +234,7 @@ population.weigths <- function (results, pops.use) { #' #' @export model.string model.string <- function (model, features, link = "I", round = 2) { - modelstring <- paste0(sapply(features[model], print.feature, alphas = TRUE, round = round), collapse="+") + modelstring <- paste0(sapply(features[model], print.feature, alphas = TRUE, round = round), collapse = "+") modelfun <- set_alphas(modelstring) modelfun$formula <- paste0(link, "(", modelfun$formula, ")") return(modelfun) @@ -293,14 +297,26 @@ get.mpm.model <- function(result, y, x, labels = F, family = "gaussian", loglik. if (family == "binomial") loglik.pi <- logistic.loglik - sm <- summary(result, labels = labels, verbose = FALSE) - mpm <- sm$feats.strings[sm$marg.probs > 0.5] - - x.precalc <- model.matrix( - as.formula(paste0("~I(", paste0(mpm, collapse = ")+I("), ")")), - data = x) + if (is(result, "mjmcmc.parallel")) { + models <- unlist(lapply(object, function (x) x$models), recursive = FALSE) + marg.probs <- marginal.probs.renorm(models)$probs + features <- object[[1]]$populations + } else if (is(result, "gmjmcmc")) { + best_pop <- which.max(unlist(object$best.margs)) + marg.probs <- object$marg.probs[[best_pop]] + features <- object$populations[[best_pop]] + } else if (is(result, "gmjmcmc.parallel") || is(result, "mjmcmc")) { + marg.probs <- object$marg.probs + features <- object$features + } + features <- features[marg.probs > 0.5] + + if (result$intercept) { + x <- cbind(1, x) + } + precalc <- precalc.features(list(x = x, y = y, fixed = result$fixed), features) - model <- loglik.pi(y = y, x = x.precalc, model = rep(TRUE, length(mpm) + 1), complex = list(oc = 0), params = params) + model <- loglik.pi(y = y, x = precalc$x, model = rep(TRUE, length(features) + result$fixed), complex = list(oc = 0), params = params) class(model) <- "bgnlm_model" model$crit <- "MPM" return(model) @@ -349,8 +365,8 @@ get.best.model <- function(result, labels = FALSE) { if (length(labels) == 1 && labels[1] == FALSE && length(result[[1]]$labels) > 0) { labels <- result[[1]]$labels } - best.chain <- which.max(sapply(result,function(x)x$best.crit)) - return(get.best.model.mjmcmc(result[[best.chain]], labels)) + best.chain <- which.max(sapply(result$chains, function (x) x$best.crit)) + return(get.best.model.mjmcmc(result$chains[[best.chain]], labels)) } if (is(result,"gmjmcmc")) { @@ -373,7 +389,9 @@ get.best.model.gmjmcmc <- function (result, labels) { best.pop.id <- which.max(sapply(result$best.margs,function(x)x)) best.mod.id <- which.max(sapply(result$models[[best.pop.id]],function(x)x$crit)) ret <- result$models[[best.pop.id]][[best.mod.id]] - names(ret$coefs) <- c("Intercept",sapply(result$populations[[best.pop.id]],print.feature,labels = labels)[which(ret$model)]) + coefnames <- sapply(result$populations[[best.pop.id]], print.feature, labels = labels)[ret$model] + if (result$intercept) coefnames <- c("Intercept", coefnames) + names(ret$coefs) <- coefnames class(ret) = "bgnlm_model" return(ret) } @@ -384,7 +402,9 @@ get.best.model.mjmcmc <- function (result, labels) { } best.mod.id <- which.max(sapply(result$models,function(x)x$crit)) ret <- result$models[[best.mod.id]] - names(ret$coefs) <- c("Intercept",sapply(result$populations,print.feature,labels = labels)[which(ret$model)]) + coefnames <- sapply(result$populations, print.feature, labels = labels)[ret$model] + if (result$intercept) coefnames <- c("Intercept", coefnames) + names(ret$coefs) <- coefnames class(ret) = "bgnlm_model" return(ret) } @@ -522,13 +542,13 @@ plot.mjmcmc_parallel <- function (x, count = "all", ...) { merge_mjmcmc_parallel <- function (x) { run.weights <- run.weigths(x) - marg.probs <- x[[1]]$marg.probs * run.weights[1] - for (i in seq_along(x[-1])) { - marg.probs <- marg.probs + x[[i]]$marg.probs * run.weights[i] + marg.probs <- x$chains[[1]]$marg.probs * run.weights[1] + for (i in seq_along(x[-c(1, (-1:0 + length(x)))])) { + marg.probs <- marg.probs + x$chains[[i]]$marg.probs * run.weights[i] } return(structure( list( - features = sapply(x[[1]]$populations, print), + features = sapply(x$chains[[1]]$populations, print), marg.probs = marg.probs, results = x ), @@ -538,7 +558,7 @@ merge_mjmcmc_parallel <- function (x) { run.weigths <- function (results) { - best.crits <- sapply(results, function (x) x$best.crit) + best.crits <- sapply(results$chains, function (x) x$best.crit) max.crit <- max(best.crits) return(exp(best.crits - max.crit) / sum(exp(best.crits - max.crit))) } diff --git a/R/summary.R b/R/summary.R index 10e731f..ce3d6ab 100644 --- a/R/summary.R +++ b/R/summary.R @@ -20,8 +20,9 @@ #' @export summary.gmjmcmc <- function (object, pop = "best", tol = 0.0001, labels = FALSE, effects = NULL, data = NULL, verbose = TRUE, ...) { transforms.bak <- set.transforms(object$transforms) - if (length(labels) == 1 && labels[1] == FALSE && length(object$labels) > 0) + if (length(labels) == 1 && labels[1] == FALSE && length(object$labels) > 0) { labels = object$labels + } if (pop == "all") { results <- list() results[[1]] <- object @@ -114,14 +115,14 @@ summary.gmjmcmc_merged <- function (object, tol = 0.0001, labels = FALSE, effect best <- max(sapply(object$results, function (y) y$best)) feats.strings <- sapply(object$features, FUN = function(x) print.feature(x = x, labels = labels, round = 2)) - if (!is.null(effects) & !is.null(labels)) { effects <- compute_effects(object,labels = labels, quantiles = effects) } obj <- summary_internal( best = object$crit.best, - feats.strings, object$marg.probs, + feats.strings = feats.strings, + marg.probs = object$marg.probs, effects = effects, best.pop = object$pop.best, thread.best = object$thread.best, @@ -154,9 +155,13 @@ summary.gmjmcmc_merged <- function (object, tol = 0.0001, labels = FALSE, effect #' #' @export summary.mjmcmc <- function (object, tol = 0.0001, labels = FALSE, effects = NULL, verbose = TRUE, ...) { - if (length(labels) == 1 && labels[1] == FALSE && length(object$labels) > 0) - labels = object$labels - return(summary.mjmcmc_parallel(list(object), tol = tol, labels = labels, effects = effects, verbose = verbose)) + return(summary.mjmcmc_parallel( + list(chains = list(object), fixed = object$fixed, intercept = object$intercept), + tol = tol, + labels = labels, + effects = effects, + verbose = verbose + )) } #' Function to print a quick summary of the results @@ -180,18 +185,15 @@ summary.mjmcmc <- function (object, tol = 0.0001, labels = FALSE, effects = NULL summary.mjmcmc_parallel <- function (object, tol = 0.0001, labels = FALSE, effects = NULL, verbose = TRUE, ...) { # Get features as strings for printing if (length(labels) == 1 && labels[1] == FALSE && length(object[[1]]$labels) > 0) { - labels = object[[1]]$labels + labels = object$chains[[1]]$labels } - feats.strings <- sapply(object[[1]]$populations, FUN = function(x) print.feature(x = x, labels = labels, round = 2)) + feats.strings <- sapply(object$chains[[1]]$populations, FUN = function(x) print.feature(x = x, labels = labels, round = 2)) # Get marginal posterior of features - models <- unlist(lapply(object, function (x) x$models), recursive = FALSE) + models <- unlist(lapply(object$chains, function (x) x$models), recursive = FALSE) marg.probs <- marginal.probs.renorm(models)$probs - best <- max(sapply(object, function (x) x$best)) + best <- max(sapply(object$chains, function (x) x$best)) if (!is.null(effects) & !is.null(labels)) { - if (is.list(object)) - effects <- compute_effects(object[[1]],labels = labels, quantiles = effects) - else - effects <- compute_effects(object,labels = labels, quantiles = effects) + effects <- compute_effects(object$chains[[1]], labels = labels, quantiles = effects) } return(summary_internal(best, feats.strings, marg.probs, effects, tol = tol, verbose = verbose)) } diff --git a/tests/testthat/test_fbms.R b/tests/testthat/test_fbms.R new file mode 100644 index 0000000..6affc4b --- /dev/null +++ b/tests/testthat/test_fbms.R @@ -0,0 +1,96 @@ +# Title : General tests +# Objective : Test the code +# Created by: jonlachmann +# Created on: 2021-02-25 + +context("MJMCMC") + +test_that("Test (G)MJMCMC through the fbms function", { + set.seed(123) + x <- matrix(rnorm(300), 100) + y <- rnorm(100, 0, 0.5) + rowSums(x[, 1:2]) + y_shift <- y + 10 + y_sin <- rnorm(100, 0, 0.05) + sin(x[, 1]) * 3 + sin(x[, 2]) * 2 + y_sin_shift <- y_sin + 10 + + validate.model <- function (model, x, y) { + expect_true(all(c(model$marg.probs[1:2] > 0.9, model$marg.probs[3] < 0.9))) + summary <- summary(model, labels = c("a", "b", "c"), tol = -1) + expect_true(all(c(summary$marg.probs[1:2] > 0.9, summary$marg.probs[3] < 0.9))) + plot(model) + pred <- predict(model, x) + # Handle paralell runs + if (!is.null(pred$aggr)) { + pred <- pred$aggr + } + rmse <- sqrt(mean((pred$mean - y)^2)) + expect_true(rmse < 0.6) + best_model <- get.best.model(model) + mpm_model <- get.mpm.model(model, y, x) + } + + validate.gmodel <- function (model, x, y) { + suppressMessages(summary <- summary(model, labels = c("a", "b", "c"), tol = -1)) + expect_true(all(c(summary$marg.probs[1:2] > 0.9, summary$marg.probs[-(1:2)] < 0.9))) + expect_true(all(summary$feats.strings[1:2] %in% c("sin(a)", "sin(b)"))) + plot(model) + pred <- predict(model, x) + # Handle paralell runs + if (!is.null(pred$aggr)) { + pred <- pred$aggr + } + rmse <- sqrt(mean((pred$mean - y)^2)) + expect_true(rmse < 0.2) + best_model <- get.best.model(model) + mpm_model <- get.mpm.model(model, y, x) + } + + params <- gen.params.gmjmcmc(list(x = x, y = y_sin)) + probs <- gen.probs.gmjmcmc("sin") + probs$gen <- c(0, 1, 0, 0) + params$feat$D <- 1 + params$feat$L <- 2 + + # No intercept + data <- as.data.frame(cbind(y, x)) + colnames(data) <- c("y", "a", "b", "c") + mod1 <- fbms(y ~ . - 1, family = "gaussian", method = "mjmcmc", data = data, verbose = FALSE) + mod1p <- fbms(y ~ . - 1, family = "gaussian", method = "mjmcmc.parallel", data = data, verbose = FALSE) + validate.model(mod1, x, y) + validate.model(mod1p, x, y) + + data$y <- y_sin + gmod1 <- fbms(y ~ . - 1, family = "gaussian", method = "gmjmcmc", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) + gmod1p <- fbms(y ~ . - 1, family = "gaussian", method = "gmjmcmc.parallel", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) + validate.gmodel(gmod1, x, y_sin) + validate.gmodel(gmod1p, x, y_sin) + + # Model defined intercept + data$y <- y_shift + mod2 <- fbms(y ~ ., family = "gaussian", method = "mjmcmc", data = data, verbose = FALSE) + mod2p <- fbms(y ~ ., family = "gaussian", method = "mjmcmc.parallel", data = data, verbose = FALSE) + validate.model(mod2, x, y_shift) + validate.model(mod2p, x, y_shift) + + data$y <- y_sin_shift + gmod2 <- fbms(y ~ ., family = "gaussian", method = "gmjmcmc", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) + gmod2p <- fbms(y ~ ., family = "gaussian", method = "gmjmcmc.parallel", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) + validate.gmodel(gmod2, x, y_sin_shift) + validate.gmodel(gmod2p, x, y_sin_shift) + + # User defined intercept + data <- cbind(data[, 1], 1, data[, -1]) + colnames(data) <- c("y", "const", "a", "b", "c") + data$y <- y_shift + mod3 <- fbms(y ~ . - 1, family = "gaussian", method = "mjmcmc", data = data, fixed = 1, verbose = FALSE) + mod3p <- fbms(y ~ . - 1, family = "gaussian", method = "mjmcmc.parallel", data = data, fixed = 1, verbose = FALSE) + validate.model(mod3, cbind(1, x), y_shift) + validate.model(mod3p, cbind(1, x), y_shift) + + data$y <- y_sin_shift + gmod3 <- fbms(y ~ . - 1, family = "gaussian", method = "gmjmcmc", data = data, transforms = "sin", params = params, probs = probs, P = 20, fixed = 1, verbose = FALSE) + gmod3p <- fbms(y ~ . - 1, family = "gaussian", method = "gmjmcmc.parallel", data = data, transforms = "sin", params = params, probs = probs, P = 20, fixed = 1, verbose = FALSE) + validate.gmodel(gmod3, cbind(1, x), y_sin_shift) + validate.gmodel(gmod3p, cbind(1, x), y_sin_shift) +}) + diff --git a/tests/testthat/test_local_optim.R b/tests/testthat/test_local_optim.R index 76357d9..76bdf39 100644 --- a/tests/testthat/test_local_optim.R +++ b/tests/testthat/test_local_optim.R @@ -17,7 +17,7 @@ test_that("Testing Greedy algorithm", { # Optimize empty model but dont allow all indices, should set all to true except disallowed optmod <- greedy.optim( c(F, F, F, F, F, F, F, F, F, F), - NULL, + list(fixed = 0), loglik.tester, c(F, F, T, T, T, T, T, T, T, T), NULL, diff --git a/tests/testthat/test_mjmcmc.R b/tests/testthat/test_mjmcmc.R index 5b86b94..14a1bb2 100644 --- a/tests/testthat/test_mjmcmc.R +++ b/tests/testthat/test_mjmcmc.R @@ -5,50 +5,79 @@ context("MJMCMC") -test_that("Testing MJMCMC algorithm", { - # Dummy test likelihood function - loglik.tester <- function (y, x, model, complex, params) { - lmmod <- lm.fit(x[, model, drop = FALSE], y) - n <- length(y) - k <- length(lmmod$coefficients) - rss <- sum(resid(lmmod)^2) - aic <- n * log(rss / n) + 2 * k - return(list(crit = aic, coefs = lmmod$coefficients)) +test_that("Test (G)MJMCMC", { + set.seed(123) + x <- matrix(rnorm(300), 100) + y <- rnorm(100, 0, 0.5) + rowSums(x[, 1:2]) + y_shift <- y + 10 + y_sin <- rnorm(100, 0, 0.05) + sin(x[, 1]) * 3 + sin(x[, 2]) * 2 + y_sin_shift <- y_sin + 10 + + validate.model <- function (model, x, y) { + expect_true(all(c(model$marg.probs[1:2] > 0.9, model$marg.probs[3] < 0.9))) + summary <- summary(model, labels = c("a", "b", "c"), tol = -1) + expect_true(all(c(summary$marg.probs[1:2] > 0.9, summary$marg.probs[3] < 0.9))) + plot(model) + pred <- predict(model, x) + # Handle paralell runs + if (!is.null(pred$aggr)) { + pred <- pred$aggr + } + rmse <- sqrt(mean((pred$mean - y)^2)) + expect_true(rmse < 0.6) + best_model <- get.best.model(model) + mpm_model <- get.mpm.model(model, y, x) + } + + validate.gmodel <- function (model, x, y) { + summary <- summary(model, labels = c("a", "b", "c"), tol = -1) + expect_true(all(c(summary$marg.probs[1:2] > 0.9, summary$marg.probs[-(1:2)] < 0.9))) + expect_true(all(summary$feats.strings[1:2] %in% c("sin(a)", "sin(b)"))) + plot(model) + pred <- predict(model, x) + # Handle paralell runs + if (!is.null(pred$aggr)) { + pred <- pred$aggr + } + rmse <- sqrt(mean((pred$mean - y)^2)) + expect_true(rmse < 0.2) + best_model <- get.best.model(model) + mpm_model <- get.mpm.model(model, y, x) } - data <- matrix(rnorm(600), 100) - resm <- mjmcmc(data[,-1], data[,1], loglik.tester, 1) - summary(resm, labels = c("a", "b", "c", "d", "e")) - plot(resm) - predm <- predict(resm, data[, -1, drop = FALSE]) - - resg <- gmjmcmc(data[,-1], data[,1], loglik.tester, NULL, c("p0", "exp_dbl"), 1) - summary(resg) - plot(resg) - prediction <- predict(resg, data[, -1, drop = FALSE]) - - respm <- mjmcmc.parallel(2, 2, data[,-1], data[,1], loglik.tester) - summary(respm) - plot(respm) - pred_pm <- predict(respm, data[, -1, drop = FALSE]) - - respg <- gmjmcmc.parallel(2, 2, NULL, data[,-1], data[,1], loglik.tester, NULL, c("p0", "exp_dbl")) - summary(respg) - plot(respg) - pred_pg <- predict(respg, data[, -1, drop = FALSE]) - - fbms_result <- fbms( - X1 ~ ., - family = "gaussian", - method = "gmjmcmc.parallel", - data = data.frame(matrix(rnorm(600), 100)), - transforms = c("sin","cos"), - P = 10, - runs = 1, - cores = 1 - ) - - # Dummy expect to run the test - expect_true(is.list(fbms_result)) + params <- gen.params.gmjmcmc(list(x = x, y = y_sin)) + probs <- gen.probs.gmjmcmc("sin") + probs$gen <- c(0, 1, 0, 0) + params$feat$D <- 1 + params$feat$L <- 2 + + # No intercept + mod1 <- mjmcmc(x, y, gaussian.loglik) + mod1p <- mjmcmc.parallel(2, 2, x, y, gaussian.loglik) + validate.model(mod1, x, y) + validate.model(mod1p, x, y) + + gmod1 <- gmjmcmc(x, y_sin, gaussian.loglik, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) + gmod1p <- gmjmcmc.parallel(2, 2, x = x, y = y_sin, loglik.pi = gaussian.loglik, transforms = "sin", params = params, probs = probs, verbose = FALSE) + validate.gmodel(gmod1, x, y_sin) + validate.gmodel(gmod1p, x, y_sin) + + # Model defined intercept + mod2 <- mjmcmc(x, y_shift, gaussian.loglik, intercept = TRUE) + validate.model(mod2, x, y_shift) + + gmod2 <- gmjmcmc(x, y_sin_shift, gaussian.loglik, transforms = "sin", params = params, probs = probs, intercept = TRUE, P = 20, verbose = FALSE) + gmod2p <- gmjmcmc.parallel(2, 2, x = x, y = y_sin_shift, loglik.pi = gaussian.loglik, transforms = "sin", params = params, probs = probs, intercept = TRUE, verbose = FALSE) + validate.gmodel(gmod2, x, y_sin_shift) + validate.gmodel(gmod2p, x, y_sin_shift) + + # User defined intercept + mod3 <- mjmcmc(cbind(1, x), y_shift, gaussian.loglik, fixed = 1) + validate.model(mod3, cbind(1, x), y_shift) + + gmod3 <- gmjmcmc(cbind(1, x), y_sin_shift, gaussian.loglik, transforms = "sin", params = params, probs = probs, fixed = 1, P = 20, verbose = FALSE) + gmod3p <- gmjmcmc.parallel(2, 2, x = cbind(1, x), y = y_sin_shift, loglik.pi = gaussian.loglik, transforms = "sin", params = params, probs = probs, fixed = 1, verbose = FALSE) + validate.gmodel(gmod3, cbind(1, x), y_sin_shift) + validate.gmodel(gmod3p, cbind(1, x), y_sin_shift) }) -- GitLab From ddd044d5fda8772465a42e0b6a4d05880521425c Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Mon, 31 Mar 2025 20:06:04 +0200 Subject: [PATCH 05/36] * New test framework for priors and new functions to set up all priors. --- R/fbms.R | 170 ++++++++++++-- R/likelihoods.R | 234 +++++++++----------- R/predict.R | 13 +- R/results.R | 27 ++- tests/testthat/test_priors.R | 49 ++++ tests_current/Ex1_Sec3.R | 28 ++- tests_current/Ex8_Sec5.2_logic_regression.R | 4 +- 7 files changed, 333 insertions(+), 192 deletions(-) create mode 100644 tests/testthat/test_priors.R diff --git a/R/fbms.R b/R/fbms.R index 0f92fd3..314895c 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -33,23 +33,33 @@ #' #' @seealso \code{\link{mjmcmc}}, \code{\link{gmjmcmc}}, \code{\link{gmjmcmc.parallel}} #' @export -fbms <- function (formula = NULL, family = "gaussian", - beta_prior = list(type = "g-prior", g = 5), - model_prior = list(r = exp(-0.5)), - data = NULL, impute = FALSE, - loglik.pi = gaussian.loglik, - method = "mjmcmc", verbose = TRUE, ...) { +fbms <- function ( + formula = NULL, + family = "gaussian", + beta_prior = list(type = "g-prior", g = 5), + model_prior = list(r = exp(-0.5)), + data = NULL, + impute = FALSE, + loglik.pi = gaussian.loglik, + method = "mjmcmc", + verbose = TRUE, + ... +) { if (is.list(beta_prior) || is.list(model_prior)) { - mlpost_params <- gen.mlpost.params(beta_prior, model_prior) - loglik.pi <- select.mlpost.fun(beta_prior, model_prior, family) + mlpost_params <- model_prior + loglik.pi <- select.mlpost.fun(beta_prior$type, model_prior, family) + mlpost_params$beta_prior <- gen.mlpost.params(beta_prior$type, model_prior, beta_prior, ncol(data) - 1, nrow(data)) + mlpost_params$beta_prior$type <- beta_prior$type + } else { + if (family == "gaussian") + loglik.pi <- gaussian.loglik + else if (family == "binomial") + loglik.pi <- logistic.loglik + else if (family == "custom") + loglik.pi <- loglik.pi } - if (family == "gaussian") - loglik.pi <- gaussian.loglik - else if (family == "binomial") - loglik.pi <- logistic.loglik - else if (family == "custom") - loglik.pi <- loglik.pi + if (!is.null(formula)) { if (missing(data)) { data <- environment(formula) @@ -57,9 +67,9 @@ fbms <- function (formula = NULL, family = "gaussian", na.opt <- getOption("na.action") if (impute) - options(na.action='na.pass') + options(na.action = 'na.pass') else - options(na.action='na.omit') + options(na.action = 'na.omit') mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data"), names(mf), 0L) mf <- mf[c(1L, m)] @@ -67,7 +77,6 @@ fbms <- function (formula = NULL, family = "gaussian", mf[[1L]] <- quote(stats::model.frame) mf <- eval(mf, parent.frame()) - Y <- model.response(mf, "any") X <- model.matrix(formula, data = data) intercept <- attr(terms(formula, data = data), "intercept") == 1 @@ -101,6 +110,7 @@ fbms <- function (formula = NULL, family = "gaussian", } } else { Y <- data[, 1] + X <- data[, -1, drop = FALSE] intercept <- TRUE imputed <- NULL na.opt <- getOption("na.action") @@ -129,11 +139,131 @@ fbms <- function (formula = NULL, family = "gaussian", return(res) } -gen.mlpost.params <- function (beta_prior, model_prior) { - return(list()) +gen.mlpost.params <- function (beta_prior, model_prior, user_params, p, n) { + if (beta_prior == "beta.prime") { + return(BAS::beta.prime(n = n)) + } else if (beta_prior == "CH") { + check_required_params(c("a", "b", "s"), user_params, beta_prior) + return(BAS::CCH(alpha = user_params$a, beta = user_params$b, s = user_params$s)) + } else if (beta_prior == "EB-local") { + return(BAS::EB.local()) + } else if (beta_prior == "g-prior") { + if (is.null(user_params$g)) { + user_params$g <- max(p^2, n) + } + return(BAS::g.prior(user_params$g)) + } else if (beta_prior == "hyper-g") { + check_required_params("a", user_params, beta_prior) + return(BAS::hyper.g(alpha = user_params$a)) + } else if (beta_prior == "tCCH") { + check_required_params(c("a", "b", "s", "rho", "v", "k"), user_params, beta_prior) + return(BAS::tCCH( + alpha = user_params$a, + beta = user_params$b, + s = user_params$s, + r = user_params$rho, + v = user_params$v, + theta = user_params$k + )) + } else if (beta_prior == "intrinsic") { + return(BAS::intrinsic(n = n)) + } else if (beta_prior == "TG") { + check_required_params("a", user_params, beta_prior) + return(BAS::TG(alpha = user_params$a)) + } else if (beta_prior == "Jeffreys") { + return(BAS::Jeffreys()) + } else if (beta_prior == "uniform") { + return(BAS::tCCH(alpha = 2, beta = 2, s = 0, r = 0, v = 1, theta = 1)) + } else if (beta_prior == "benchmark") { + return(BAS::tCCH(alpha = 0.02, beta = 0.02 * max(n, p^2), s = 0, r = 0, v = 1, theta = 1)) + } else if (beta_prior == "ZS-adapted") { + return(BAS::tCCH(alpha = 1, beta = 2, s = n + 3, r = 0, v = 1, theta = 1)) + } else if (beta_prior == "robust") { + return(BAS::robust(n = n)) + } else if (beta_prior == "hyper-g-n") { + if (is.null(user_params$a)) user_params$a <- 3 + return(BAS::hyper.g.n(alpha = user_params$a, n = n)) + } else if (beta_prior == "BIC") { + return(BAS::bic.prior(n = n)) + } else if (beta_prior == "ZS-null") { + return(list(method = 4)) + } else if (beta_prior == "ZS-full") { + return(list(method = 5)) + } else if (beta_prior == "hyper-g-laplace") { + return(list(method = 6)) + } else if (beta_prior == "AIC") { + return(list(method = 7)) + } else if (beta_prior == "EB-global") { + return(list(method = 2)) + } else if (beta_prior == "JZS") { + return(list(method = 9)) + } else if (beta_prior == "Jeffreys-BIC") { + return(NULL) + } + + stop("Unknown prior, please verify your inputs.") +} + +check_required_params <- function (required, user_params, beta_prior) { + for (req in required) { + if (is.null(user_params[[req]]) || !is.numeric(user_params[[req]])) { + par_names <- paste0(required, collapse = ", ") + stop(paste0("The parameters: ", par_names, " must be provided for the ", beta_prior, " prior.")) + return(FALSE) + } + } + return(TRUE) } select.mlpost.fun <- function (beta_prior, model_prior, family) { - return(gaussian.loglik.g) + if (!(family %in% c("binomial", "poisson", "gamma", "gaussian"))) { + stop(paste0( + "Unsupported family: ", family, ". Supported families are 'binomial', 'poisson', 'gamma', or 'gaussian'." + )) + } + + gaussian_only_priors <- c("ZS-null", "ZS-full", "hyper-g-laplace","BIC", "AIC", "JZS","EB-global") + gaussian_not_robust <- c("CH", "tCCH", "ZS-adapted", "TG", "beta.prime", "benchmark", "Jeffreys") + gaussian_robust <- c("g-prior", "hyper-g", "EB-local","BIC", "Jeffreys-BIC", "ZS-null", "ZS-full", "hyper-g-laplace", + "AIC", "hyper-g-n", "JZS") + gaussian_tcch <- c("CH", "tCCH", "TG","beta.prime", "intrinsic", "ZS-adapted", "uniform","Jeffreys", "benchmark", "robust") + gaussian_bas <- c("g-prior", "hyper-g", "EB-local","ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS") + glm_priors <- c("CH", "tCCH", "TG","beta.prime", "EB-local", "g-prior", "hyper-g", "hyper-g-n", + "intrinsic", "ZS-adapted", "Jeffreys", "uniform", "benchmark", "robust", "Jeffreys-BIC") + + if (family %in% c("binomial", "poisson", "gamma")) { + if (beta_prior %in% gaussian_only_priors) { + stop(paste0( + "Prior ", beta_prior, " is not supported for the GLM family", family, + ". Supported priors are: ", paste(glm_priors, collapse = ", ") + )) + } + if (beta_prior == "Jeffreys-BIC") { + if (family == "binomial") { + return(logistic.loglik) + } else { + return(glm.loglik) + } + } else { + return(glm.logpost.bas) + } + } else if (family == "gaussian") { + if (beta_prior %in% gaussian_not_robust) { + warning(paste0( + "Prior ", beta_prior, " is not recommended for Gaussian family models as it may be unstable for strong signals (R^2 > 0.9).", + "Recommended priors under the Gaussian family are: ", paste(gaussian_robust, collapse = ", ") + )) + } + if (beta_prior %in% gaussian_tcch) { + return(gaussian_tcch_log_likelihood) + } else if (beta_prior == "Jeffreys-BIC") { + return(gaussian.loglik) + } else if (beta_prior == "g-prior") { + return(gaussian.loglik.g) + } else if (beta_prior %in% gaussian_bas) { + return(lm.logpost.bas) + } + } + stop("Unknown prior, please verify your inputs.") } diff --git a/R/likelihoods.R b/R/likelihoods.R index 83c8908..000bcbe 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -16,68 +16,47 @@ #' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). #' #' @examples -#' glm.logpost.bas(as.integer(rnorm(100) > 0),cbind(1,matrix(rnorm(100))),c(TRUE,TRUE),list(oc = 1)) +#' glm.logpost.bas(as.integer(rnorm(100) > 0), cbind(1, matrix(rnorm(100))), c(TRUE, TRUE), list(oc = 1)) #' #' @importFrom BAS uniform Jeffreys g.prior #' @importFrom stats poisson Gamma glm.control #' @export glm.logpost.bas glm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), family = "binomial", prior_beta = Jeffreys(), laplace = FALSE)) { if (length(params) == 0) - params <- list(r = 1/dim(x)[1], family = "binomial", prior_beta = g.prior(max(dim(x)[1],sum(model)-1)), laplace = FALSE) + params <- list(r = 1 / dim(x)[1], family = "binomial", prior_beta = g.prior(max(dim(x)[1], sum(model) - 1)), laplace = FALSE) p <- sum(model) - 1 - if(p==0) - { - probinit <- as.numeric(c(1,0.99)) + if (p == 0) { + probinit <- as.numeric(c(1, 0.99)) model[2] <- T - }else{ - probinit <- as.numeric(c(1,rep(0.99,p))) + } else { + probinit <- as.numeric(c(1, rep(0.99, p))) } - mod<-NULL + mod <- NULL + + if (params$family == "binomial") + family_use <- binomial() + else if (params$family == "poisson") + family_use <- poisson() + else + family_use <- Gamma() - tryCatch({ - if(params$family == "binomial") - suppressWarnings({ - mod <- .Call(BAS:::C_glm_deterministic, - y = as.numeric(y), X = as.matrix(x[,model]), - Roffset = as.numeric(rep(0, length(y))), - Rweights = as.numeric(rep(1, length(y))), - Rprobinit = probinit, - Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), - modelprior = uniform(), - betaprior = params$prior_beta, - family = binomial(), - Rcontrol = glm.control(), - Rlaplace = as.integer(params$laplace)) - }) - else if(params$family == "poisson") - suppressWarnings({ - mod <- .Call(BAS:::C_glm_deterministic, - y = as.numeric(y), X = as.matrix(x[,model]), - Roffset = as.numeric(rep(0, length(y))), - Rweights = as.numeric(rep(1, length(y))), - Rprobinit = probinit, - Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), - modelprior = uniform(), - betaprior = params$prior_beta, - family = poisson(), - Rcontrol = glm.control(), - Rlaplace = as.integer(params$laplace)) + tryCatch({ suppressWarnings({ + mod <- .Call( + BAS:::C_glm_deterministic, + y = as.numeric(y), + X = as.matrix(x[, model]), + Roffset = as.numeric(rep(0, length(y))), + Rweights = as.numeric(rep(1, length(y))), + Rprobinit = probinit, + Rmodeldim = as.integer(rep(0, ifelse(p == 0,2,1))), + modelprior = uniform(), + betaprior = params$prior_beta, + family = family_use, + Rcontrol = glm.control(), + Rlaplace = as.integer(params$laplace) + ) }) - else{ - suppressWarnings({ - mod <- .Call(BAS:::C_glm_deterministic, - y = as.numeric(y), X = as.matrix(x[,model]), - Roffset = as.numeric(rep(0, length(y))), - Rweights = as.numeric(rep(1, length(y))), - Rprobinit = probinit, - Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), - modelprior = uniform(), - betaprior = params$prior_beta, - family = Gamma(), - Rcontrol = glm.control(), - Rlaplace = as.integer(params$laplace)) - })} }, error = function(e) { # Handle the error by setting result to NULL mod <- NULL @@ -85,17 +64,16 @@ glm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), cat("An error occurred:", conditionMessage(e), "\n") }) - if(length(mod)==0) { - return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)),coefs = rep(0,p+1))) + if (length(mod) == 0) { + return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)), coefs = rep(0, p + 1))) } - if(p == 0) - { + if (p == 0) { ret <- mod$logmarg[2] + log(params$r) * sum(complex$oc) - return(list(crit=ret, coefs=mod$mle[[2]])) + return(list(crit = ret, coefs = mod$mle[[2]])) } ret <- mod$logmarg + log(params$r) * sum(complex$oc) - return(list(crit=ret, coefs=mod$mle[[1]])) + return(list(crit = ret, coefs = mod$mle[[1]])) } @@ -116,32 +94,33 @@ glm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), #' #' #' @export lm.logpost.bas -lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5),prior_beta = "g-prior",alpha = 4)) { +lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), beta_prior = list(method = 1))) { if (length(params) == 0) - params <- list(r = 1/dim(x)[1], prior_beta = 0,alpha = max(dim(x)[1],sum(model)^2)) - + params <- list( + r = 1/dim(x)[1], + beta_prior = list(method = 0, alpha = max(dim(x)[1], sum(model)^2)) + ) p <- sum(model) - 1 - if(p==0) - { - probinit <- as.numeric(c(1,0.99)) + if (p == 0) { + probinit <- as.numeric(c(1, 0.99)) model[2] <- T - }else{ - probinit <- as.numeric(c(1,rep(0.99,p))) + } else { + probinit <- as.numeric(c(1, rep(0.99, p))) } - mod<-NULL + mod <- NULL tryCatch({ suppressWarnings({ mod <- .Call(BAS:::C_deterministic, - y = y, X = as.matrix(x[,model]), + y = y, X = as.matrix(x[, model]), as.numeric(rep(1, length(y))), probinit, - as.integer(rep(0,ifelse(p==0,2,1))), + as.integer(rep(0, ifelse(p == 0,2,1))), incint = as.integer(F), - alpha = ifelse(length(params$alpha)>0,as.numeric(params$alpha),NULL), - method = as.integer(params$prior_beta), + alpha = ifelse(length(params$beta_prior$alpha) > 0, as.numeric(params$beta_prior$alpha),NULL), + method = as.integer(params$beta_prior$method), modelprior = uniform(), Rpivot = TRUE, Rtol = 1e-7) @@ -153,17 +132,16 @@ lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5),pr cat("An error occurred:", conditionMessage(e), "\n") }) - if(length(mod)==0) { - return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)),coefs = rep(0,p+1))) + if (length(mod) == 0) { + return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)), coefs = rep(0, p + 1))) } - if(p == 0) - { + if (p == 0) { ret <- mod$logmarg[2] + log(params$r) * sum(complex$oc) - return(list(crit=ret, coefs=mod$mle[[2]])) + return(list(crit = ret, coefs = mod$mle[[2]])) } ret <- mod$logmarg + log(params$r) * sum(complex$oc) - return(list(crit=ret, coefs=mod$mle[[1]])) + return(list(crit = ret, coefs = mod$mle[[1]])) } @@ -186,10 +164,10 @@ lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5),pr #' @export logistic.loglik logistic.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5))) { if (length(params) == 0) - params <- list(r = 1/dim(x)[1]) + params <- list(r = 1 / dim(x)[1]) suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = binomial())}) ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * sum(complex$oc))) / 2 - return(list(crit=ret, coefs=mod$coefficients)) + return(list(crit = ret, coefs = mod$coefficients)) } #' Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior @@ -209,23 +187,21 @@ logistic.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5))) #' #' #' @export glm.loglik -glm.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5),family = "Gamma")) { +glm.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5), family = "Gamma")) { if (length(params) == 0) - params <- list(r = 1/dim(x)[1]) + params <- list(r = 1 / dim(x)[1]) - if(params$family == "binomial") - { + if (params$family == "binomial") { fam = binomial() - }else if(params$family == "poisson"){ + } else if (params$family == "poisson") { fam = poisson() - }else - { + } else { fam = Gamma() } suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = fam)}) ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * sum(complex$oc))) / 2 - return(list(crit=ret, coefs=mod$coefficients)) + return(list(crit = ret, coefs = mod$coefficients)) } @@ -278,9 +254,10 @@ gaussian.loglik <- function (y, x, model, complex, params) { #' gaussian.loglik.g(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) #' #' @export gaussian.loglik.g -gaussian.loglik.g <- function (y, x, model, complex, params = NULL) -{ - if(length(params)==0) +gaussian.loglik.g <- function (y, x, model, complex, params = NULL) { + if (sum(model) == 0) + return(list(crit = -Inf, coefs = numeric())) + if (length(params) == 0) params <- list() if (length(params$r) == 0) params$r <- 1/dim(x)[1] @@ -330,8 +307,7 @@ gaussian.loglik.g <- function (y, x, model, complex, params = NULL) #' @importFrom BAS phi1 hypergeometric1F1 hypergeometric2F1 #' @importFrom tolerance F1 #' @export -gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = exp(-0.5), prior_beta = "intrinsic")) { - +gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = exp(-0.5), beta_prior = "intrinsic")) { # Fit the linear model using fastglm fitted_model <- fastglm(as.matrix(x[, model]), y, family = gaussian()) log_likelihood <- -(fitted_model$aic -2 * (fitted_model$rank))/2 @@ -345,130 +321,118 @@ gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = n <- length(y) # Switch-like structure to assign hyperparameters based on prior - if (params$prior_beta[[1]] == "CH") { + hyper <- params$beta_prior$hyper.parameters + if (params$beta_prior$type == "CH") { # CH prior: b and s should be user-specified, with defaults if not provided - a <- ifelse(!is.null(params$prior_beta$a),params$prior_beta$a, 1) # Default to 1 if not specified - b <- ifelse(!is.null(params$prior_beta$b),params$prior_beta$b, 2) # Default to 1 if not specified + a <- ifelse(!is.null(hyper$a), hyper$a, 1) # Default to 1 if not specified + b <- ifelse(!is.null(hyper$b), hyper$b, 2) # Default to 1 if not specified r <- 0 - s <- ifelse(!is.null(params$prior_beta$s), params$prior_beta$s, 1) # Default to 1 if not specified + s <- ifelse(!is.null(hyper$s), hyper$s, 1) # Default to 1 if not specified v <- 1 k <- 1 - - } else if (params$prior_beta[[1]] == "hyper-g") { + } else if (params$beta_prior$type == "hyper-g") { a <- 1 b <- 2 r <- 0 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta[[1]] == "uniform") { + } else if (params$beta_prior$type == "uniform") { a <- 2 b <- 2 r <- 0 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta[[1]] == "Jeffreys") { + } else if (params$beta_prior$type == "Jeffreys") { a <- 0.0001 b <- 2 r <- 0 s <- 0 v <- 1 k <- 1 - } else if (params$prior_beta[[1]] == "beta.prime") { + } else if (params$beta_prior$type == "beta.prime") { a <- 1/2 b <- n - p_M - 1.5 r <- 0 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta[[1]] == "benchmark") { + } else if (params$beta_prior$type == "benchmark") { a <- 0.02 b <- 0.02 * max(n, p_M^2) r <- 0 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta[[1]] == "TG") { - - a <- 2 * ifelse(!is.null(params$prior_beta$a),params$prior_beta$a, 1) + } else if (params$beta_prior$type == "TG") { + a <- 2 * ifelse(!is.null(hyper$a), hyper$a, 1) b <- 2 r <- 0 - s <- 2 * ifelse(!is.null(params$prior_beta$s),params$prior_beta$s, 1) + s <- 2 * ifelse(!is.null(hyper$s), hyper$s, 1) v <- 1 k <- 1 - - } else if (params$prior_beta[[1]] == "ZS-adapted") { + } else if (params$beta_prior$type == "ZS-adapted") { a <- 1 b <- 2 r <- 0 s <- n + 3 v <- 1 k <- 1 - } else if (params$prior_beta[[1]] == "robust") { + } else if (params$beta_prior$type == "robust") { a <- 1 b <- 2 r <- 1.5 s <- 0 v <- (n + 1) / (p_M + 1) k <- 1 - - } else if (params$prior_beta[[1]] == "hyper-g-n") { + } else if (params$beta_prior$type == "hyper-g-n") { a <- 1 b <- 2 r <- 1.5 s <- 0 v <- 1 k <- 1 - - } else if (params$prior_beta[[1]] == "intrinsic") { + } else if (params$beta_prior$type == "intrinsic") { a <- 1 b <- 1 r <- 1 s <- 0 v <- (n + p_M + 1) / (p_M + 1) k <- (n + p_M + 1) / n - - }else if (params$prior_beta[[1]] == "tCCH") { - a <- params$prior_beta$a - b <- params$prior_beta$b - r <- params$prior_beta$rho - s <- params$prior_beta$s - v <- params$prior_beta$v - k <- params$prior_beta$k - }else { - stop("Unknown prior name: ", params$prior_beta) + } else if (params$beta_prior$type == "tCCH") { + a <- hyper$a + b <- hyper$b + r <- hyper$r + s <- hyper$s + v <- hyper$v + k <- hyper$k + } else { + stop("Unknown prior name: ", params$beta_prior$type) } - # if (!is.null(r) & r == 0) { - term1 <- lbeta((a + p_M) / 2, b / 2) - term2 <- phi1(b / 2, (n - 1) / 2, (a + b + p_M) / 2, s / (2 * v), min(0.8,R2_M/(v - (v - 1) * R2_M),log = T)) + term2 <- phi1(b / 2, (n - 1) / 2, (a + b + p_M) / 2, s / (2 * v), min(0.8, R2_M / (v - (v - 1) * R2_M), log = TRUE)) - if(R2_M/(v - (v - 1) * R2_M)>0.8) - { + if (R2_M / (v - (v - 1) * R2_M) > 0.8) { warning("Infinite marginal log likelihood! phi1 last argument reduced to 0.8. Use a different prior_beta (Robust, Hyper-g/n, Intrinsic, or g-prior)") } term3 <- lbeta(a / 2, b / 2) - term4 <- hypergeometric1F1(b / 2, (a + b) / 2, s / (2 * v),log = T) - marginal_likelihood <- log_likelihood + (term1) + (term2) - (p_M / 2) * log(v) - ((n - 1) / 2)*log(1 - (1 - 1 / v) * R2_M) - (term3) - (term4) + term4 <- hypergeometric1F1(b / 2, (a + b) / 2, s / (2 * v), log = TRUE) + marginal_likelihood <- log_likelihood + (term1) + (term2) - (p_M / 2) * log(v) - ((n - 1) / 2) * log(1 - (1 - 1 / v) * R2_M) - (term3) - (term4) } else if (!is.null(s) & s == 0) { term1 <- lbeta((a + p_M) / 2, b / 2) - term2 <- hypergeometric2F1(r, b / 2, (a + b) / 2, 1 - k,log = T) + term2 <- hypergeometric2F1(r, b / 2, (a + b) / 2, 1 - k, log = TRUE) term3 <- F1((a + p_M) / 2, (a + b + p_M + 1 - n - 2 * r) / 2, (n - 1) / 2, (a + b + p_M) / 2, 1 - k, 1 - k - (R2_M^2 * k) / ((1 - R2_M) * v)) - marginal_likelihood <- log_likelihood + (a+p_M-2*r)/2*log(k) + (term1) - (term2) - (term3) - (p_M / 2) * log(v) - log(1 - R2_M) * ((n - 1) / 2) - lbeta(a / 2, b / 2) + marginal_likelihood <- log_likelihood + (a + p_M - 2 * r) / 2 * log(k) + (term1) - (term2) - (term3) - (p_M / 2) * log(v) - log(1 - R2_M) * ((n - 1) / 2) - lbeta(a / 2, b / 2) } else { stop("Invalid inputs: either r = 0 or s = 0 must be specified.") } - if (length(params$r) == 0) params$r <- 1/dim(x)[1] # default value or parameter r + if (length(params$r) == 0) params$r <- 1 / dim(x)[1] # default value or parameter r lp <- log_prior(params, complex) diff --git a/R/predict.R b/R/predict.R index 39887ef..396da42 100644 --- a/R/predict.R +++ b/R/predict.R @@ -35,16 +35,11 @@ #' #' @export predict.bgnlm_model <- function(object, x, link = function(x) { x }, ... ) { - x.precalc <- model.matrix( - as.formula(paste0("~I(", paste0(names(object$coefs)[-1][object$coefs[-1]!=0], collapse = ")+I("), ")")), - data = x - ) - #browser() - if(dim(x.precalc)[2] 0.5] @@ -315,10 +315,15 @@ get.mpm.model <- function(result, y, x, labels = F, family = "gaussian", loglik. x <- cbind(1, x) } precalc <- precalc.features(list(x = x, y = y, fixed = result$fixed), features) - - model <- loglik.pi(y = y, x = precalc$x, model = rep(TRUE, length(features) + result$fixed), complex = list(oc = 0), params = params) - class(model) <- "bgnlm_model" - model$crit <- "MPM" + + coefs <- loglik.pi(y = y, x = precalc$x, model = rep(TRUE, length(features) + result$fixed), complex = list(oc = 0), params = params)$coefs + model <- structure(list( + coefs = coefs, + features = features, + fixed = result$fixed, + intercept = result$intercept + ), class = "bgnlm_model") + return(model) } diff --git a/tests/testthat/test_priors.R b/tests/testthat/test_priors.R new file mode 100644 index 0000000..07dd7dc --- /dev/null +++ b/tests/testthat/test_priors.R @@ -0,0 +1,49 @@ +# Title : General tests +# Objective : Test the code +# Created by: jonlachmann +# Created on: 2021-02-25 + +context("Priors") + +test_that("Test various priors through the fbms function", { + set.seed(123) + x <- matrix(rnorm(300), 100) + y <- rnorm(100, 0, 0.5) + rowSums(x[, 1:2]) + + validate.model <- function (model, x, y) { + expect_true(all(c(model$marg.probs[1:2] > 0.9, model$marg.probs[3] < 0.9))) + summary <- summary(model, labels = c("a", "b", "c"), tol = -1) + expect_true(all(c(summary$marg.probs[1:2] > 0.9, summary$marg.probs[3] < 0.9))) + plot(model) + pred <- predict(model, x) + # Handle paralell runs + if (!is.null(pred$aggr)) { + pred <- pred$aggr + } + rmse <- sqrt(mean((pred$mean - y)^2)) + expect_true(rmse < 0.6) + best_model <- get.best.model(model) + mpm_model <- get.mpm.model(model, y, x) + } + + # No intercept + data <- as.data.frame(cbind(y, x)) + colnames(data) <- c("y", "a", "b", "c") + family <- "gaussian" + beta_prior <- list(type = "g-prior", g = 5, a = 3, b = 1, s = 1, rho = 0, v = 1, k = 1) + + gaussian_priors <- c( + "CH", "tCCH", "TG","beta.prime", "intrinsic", "ZS-adapted", "uniform","Jeffreys", "benchmark", "robust", + "g-prior", "hyper-g", "EB-local", "ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS", + "Jeffreys-BIC", "g-prior" + ) + + for (prior in gaussian_priors) { + beta_prior$type <- prior + mod1 <- fbms(y ~ ., family = family, beta_prior = beta_prior, method = "mjmcmc", data = data, verbose = FALSE) + if (!(prior %in% c("hyper-g", "EB-local", "uniform", "ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS"))) { + validate.model(mod1, x, y) + } + } +}) + diff --git a/tests_current/Ex1_Sec3.R b/tests_current/Ex1_Sec3.R index 9b3e6c2..ccba4f8 100644 --- a/tests_current/Ex1_Sec3.R +++ b/tests_current/Ex1_Sec3.R @@ -15,15 +15,13 @@ library(FBMS) data <- read.csv("https://raw.githubusercontent.com/OpenExoplanetCatalogue/oec_tables/master/comma_separated/open_exoplanet_catalogue.txt") -data <- na.omit(data[,c("semimajoraxis","mass","radius","period","eccentricity","hoststar_mass","hoststar_radius","hoststar_metallicity","hoststar_temperature","binaryflag")]) +data <- na.omit(data[, c("semimajoraxis", "mass", "radius", "period", "eccentricity", "hoststar_mass", "hoststar_radius", "hoststar_metallicity", "hoststar_temperature", "binaryflag")]) summary(data) te.ind <- 540:939 -df.train = data[-te.ind,] -df.test = data[te.ind,] -x.train <- cbind(1, df.train[, -1]) -y.train <- df.train[, 1] +df.train = data[-te.ind, ] +df.test = data[te.ind, ] to3 <- function(x) x^3 transforms <- c("sigmoid","sin_deg","exp_dbl","p0","troot","to3") @@ -44,7 +42,7 @@ params$mlpost$var <- "unknown" if (use.fbms) { result.default <- fbms(formula = semimajoraxis ~ 1 + . , data = df.train, method = "gmjmcmc", transforms = transforms, params = params) } else { - result.default <- gmjmcmc(x.train, y.train, fixed = 1, transforms = transforms, params = params) + result.default <- gmjmcmc(df.train[, -1], df.train[, 1], intercept = TRUE, transforms = transforms, params = params) } summary(result.default, labels = FALSE) @@ -54,11 +52,11 @@ sqrt(mean((preds$aggr$mean - df.test$semimajoraxis)^2)) #new additional ways to predict using MPM and best model get.best.model(result = result.default) -preds <- predict(get.best.model(result.default), df.test[,-1]) +preds <- predict(get.best.model(result.default), df.test[, -1]) sqrt(mean((preds - df.test$semimajoraxis)^2)) -get.mpm.model(result = result.default,y = df.test$semimajoraxis,x=df.test[,-1]) -preds <- predict(get.mpm.model(result.default,y = df.test$semimajoraxis,x=df.test[,-1]), df.test[,-1]) +get.mpm.model(result = result.default, y = df.test$semimajoraxis, x = df.test[, -1]) +preds <- predict(get.mpm.model(result.default, y = df.test$semimajoraxis, x = df.test[, -1]), df.test[, -1]) sqrt(mean((preds - df.test$semimajoraxis)^2)) #################################################### @@ -72,10 +70,10 @@ set.seed(123) if (use.fbms) { result.P50 <- fbms(data = df.train, method = "gmjmcmc", transforms = transforms, - P=50, N.init=1000, N.final=1000, params = params) + P = 50, N.init = 1000, N.final = 1000, params = params) } else { - result.P50 <- gmjmcmc(cbind(1, df.train[, -1]), df.train[, 1], fixed = 1, transforms = transforms, - P=50, N.init=1000, N.final=1000, params = params) + result.P50 <- gmjmcmc(df.train[, -1], df.train[, 1], intercept = TRUE, transforms = transforms, + P = 50, N.init = 1000, N.final = 1000, params = params) } summary(result.P50, labels = names(df.train)[-1]) @@ -88,9 +86,9 @@ summary(result.P50, labels = names(df.train)[-1]) set.seed(123) if (use.fbms) { result_parallel <- fbms(data = df.train, method = "gmjmcmc.parallel", transforms = transforms, - runs = 40, cores = 10, P=25,params = params) + runs = 40, cores = 10, P = 25,params = params) } else { - result_parallel <- gmjmcmc.parallel(runs = 40, cores = 10, x = cbind(1, df.train[, -1]), y = df.train[, 1], fixed = 1, loglik.pi = gaussian.loglik, + result_parallel <- gmjmcmc.parallel(runs = 40, cores = 10, x = df.train[, -1], y = df.train[, 1], intercept = TRUE, loglik.pi = gaussian.loglik, transforms = transforms, P = 25, params = params) } summary(result_parallel, tol = 0.01) @@ -104,7 +102,7 @@ if (use.fbms) { runs = 40, cores = 10, P=25,params = params) } else { result_parallel_unitphi <- gmjmcmc.parallel(runs = 40, cores = 10, data = df.train, loglik.pi = gaussian.loglik, - transforms = transforms, P=25,params = params) + transforms = transforms, P = 25, params = params) } summary(result_parallel_unitphi, tol = 0.01) diff --git a/tests_current/Ex8_Sec5.2_logic_regression.R b/tests_current/Ex8_Sec5.2_logic_regression.R index 699e087..c736984 100644 --- a/tests_current/Ex8_Sec5.2_logic_regression.R +++ b/tests_current/Ex8_Sec5.2_logic_regression.R @@ -143,7 +143,7 @@ points(pred_mpm,df.test$Mean,col = 4) set.seed(5002) if (use.fbms) { - result_parallel <- fbms(data = df.training, family = "custom", loglik.pi = estimate.logic.lm,N.init = 500,N.final = 500, + result_parallel <- fbms(data = df.training, family = "custom", loglik.pi = estimate.logic.lm, N.init = 500, N.final = 500, method = "gmjmcmc.parallel", runs = 16, cores = 8, transforms = transforms, probs = probs, params = params, P=25) } else { @@ -152,7 +152,7 @@ if (use.fbms) { transforms = transforms, probs = probs, params = params, P=25) } summary(result_parallel) -mpm <- get.mpm.model(result_parallel,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) +mpm <- get.mpm.model(result_parallel, y = df.training$Y2, x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) mbest <- get.best.model(result_parallel) -- GitLab From d53a6e6108e434f41cb8872c357f9f414e48c686 Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Wed, 2 Apr 2025 18:25:11 +0200 Subject: [PATCH 06/36] * WIP --- R/fbms.R | 16 ++++++++++++---- tests/testthat/test_priors.R | 10 ++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/R/fbms.R b/R/fbms.R index 314895c..69cc88a 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -47,8 +47,8 @@ fbms <- function ( ) { if (is.list(beta_prior) || is.list(model_prior)) { mlpost_params <- model_prior - loglik.pi <- select.mlpost.fun(beta_prior$type, model_prior, family) - mlpost_params$beta_prior <- gen.mlpost.params(beta_prior$type, model_prior, beta_prior, ncol(data) - 1, nrow(data)) + loglik.pi <- select.mlpost.fun(beta_prior$type, family) + mlpost_params$beta_prior <- gen.mlpost.params(beta_prior$type, beta_prior, ncol(data) - 1, nrow(data)) mlpost_params$beta_prior$type <- beta_prior$type } else { if (family == "gaussian") @@ -139,7 +139,15 @@ fbms <- function ( return(res) } -gen.mlpost.params <- function (beta_prior, model_prior, user_params, p, n) { +fbms.mlpost.master <- function (y, x, model, complex, params = list(family = "gaussian", beta_prior = list(type = "g-prior"), r = exp(-0.5))) { + params_use <- params + params_use$beta_prior <- gen.mlpost.params(params$beta_prior$type, params$beta_prior, ncol(x), nrow(x)) + params_use$beta_prior$type <- params$beta_prior$type + loglik.pi <- select.mlpost.fun(params$beta_prior$type, params$family) + return(loglik.pi(y, x, model, complex, params_use)) +} + +gen.mlpost.params <- function (beta_prior, user_params, p, n) { if (beta_prior == "beta.prime") { return(BAS::beta.prime(n = n)) } else if (beta_prior == "CH") { @@ -215,7 +223,7 @@ check_required_params <- function (required, user_params, beta_prior) { return(TRUE) } -select.mlpost.fun <- function (beta_prior, model_prior, family) { +select.mlpost.fun <- function (beta_prior, family) { if (!(family %in% c("binomial", "poisson", "gamma", "gaussian"))) { stop(paste0( "Unsupported family: ", family, ". Supported families are 'binomial', 'poisson', 'gamma', or 'gaussian'." diff --git a/tests/testthat/test_priors.R b/tests/testthat/test_priors.R index 07dd7dc..f729776 100644 --- a/tests/testthat/test_priors.R +++ b/tests/testthat/test_priors.R @@ -38,12 +38,14 @@ test_that("Test various priors through the fbms function", { "Jeffreys-BIC", "g-prior" ) + results <- list() for (prior in gaussian_priors) { beta_prior$type <- prior - mod1 <- fbms(y ~ ., family = family, beta_prior = beta_prior, method = "mjmcmc", data = data, verbose = FALSE) - if (!(prior %in% c("hyper-g", "EB-local", "uniform", "ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS"))) { - validate.model(mod1, x, y) - } + results[[prior]] <- fbms.mlpost.master(data[, 1], x, c(TRUE, TRUE, TRUE), list(beta_prior = beta_prior, family = "gaussian", r = exp(-0.5))) + #mod1 <- fbms(y ~ ., family = family, beta_prior = beta_prior, method = "mjmcmc", data = data, verbose = FALSE) + #if (!(prior %in% c("hyper-g", "EB-local", "uniform", "ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS"))) { + # validate.model(mod1, x, y) + #} } }) -- GitLab From 75487d2e83eb23f0842d073242a04649bbe93a28 Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Wed, 2 Apr 2025 19:32:10 +0200 Subject: [PATCH 07/36] * Minor improvements. --- R/fbms.R | 4 +++- R/likelihoods.R | 16 +++++++++------- tests/testthat/test_priors.R | 35 +++++++++++++++++++++++++++++++++-- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/R/fbms.R b/R/fbms.R index 69cc88a..37f41cb 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -162,7 +162,9 @@ gen.mlpost.params <- function (beta_prior, user_params, p, n) { return(BAS::g.prior(user_params$g)) } else if (beta_prior == "hyper-g") { check_required_params("a", user_params, beta_prior) - return(BAS::hyper.g(alpha = user_params$a)) + params <- BAS::hyper.g(alpha = user_params$a) + params$method <- 1 + return(params) } else if (beta_prior == "tCCH") { check_required_params(c("a", "b", "s", "rho", "v", "k"), user_params, beta_prior) return(BAS::tCCH( diff --git a/R/likelihoods.R b/R/likelihoods.R index 000bcbe..f7d8e12 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -97,7 +97,7 @@ glm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), beta_prior = list(method = 1))) { if (length(params) == 0) params <- list( - r = 1/dim(x)[1], + r = 1 / dim(x)[1], beta_prior = list(method = 0, alpha = max(dim(x)[1], sum(model)^2)) ) @@ -119,7 +119,7 @@ lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), b probinit, as.integer(rep(0, ifelse(p == 0,2,1))), incint = as.integer(F), - alpha = ifelse(length(params$beta_prior$alpha) > 0, as.numeric(params$beta_prior$alpha),NULL), + alpha = ifelse(length(params$beta_prior$hyper.parameters$alpha) > 0, as.numeric(params$beta_prior$hyper.parameters$alpha),NULL), method = as.integer(params$beta_prior$method), modelprior = uniform(), Rpivot = TRUE, @@ -270,14 +270,16 @@ gaussian.loglik.g <- function (y, x, model, complex, params = NULL) { RSS <- sum(mod$residuals^2) Rsquare <- 1 - (RSS / TSS) - if (length(params$r) == 0 || length(params$g) == 0) - { - params$r <- 1/dim(x)[1] - params$g <- max(mod$rank^2,length(y)) + if (length(params$r) == 0 || length(params$g) == 0) { + params$r <- 1 / dim(x)[1] + if (!is.null(params$beta_prior$g)) + params$g <- params$beta_prior$g + else + params$g <- max(mod$rank^2, length(y)) } # logarithm of marginal likelihood - mloglik <- 0.5*(log(1.0 + params$g) * (dim(x)[1] - mod$rank) - log(1.0 + params$g * (1.0 - Rsquare)) * (dim(x)[1] - 1))*(mod$rank!=1) + mloglik <- 0.5 * (log(1.0 + params$g) * (dim(x)[1] - mod$rank) - log(1.0 + params$g * (1.0 - Rsquare)) * (dim(x)[1] - 1)) * (mod$rank != 1) # logarithm of model prior # default value or parameter r diff --git a/tests/testthat/test_priors.R b/tests/testthat/test_priors.R index f729776..ccd7ee0 100644 --- a/tests/testthat/test_priors.R +++ b/tests/testthat/test_priors.R @@ -33,19 +33,50 @@ test_that("Test various priors through the fbms function", { beta_prior <- list(type = "g-prior", g = 5, a = 3, b = 1, s = 1, rho = 0, v = 1, k = 1) gaussian_priors <- c( - "CH", "tCCH", "TG","beta.prime", "intrinsic", "ZS-adapted", "uniform","Jeffreys", "benchmark", "robust", + "CH", "tCCH", "TG", "beta.prime", "intrinsic", "ZS-adapted", "uniform","Jeffreys", "benchmark", "robust", "g-prior", "hyper-g", "EB-local", "ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS", "Jeffreys-BIC", "g-prior" ) + expected <- list( + CH = list(crit = 1.79116859063856e+28, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + tCCH = list(crit = 1.79116859063856e+28, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + TG = list(crit = 3.30010826314369e+26, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + beta.prime = list(crit = 2.37584282903985e+33, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + intrinsic = list(crit = 16.013997004248, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + `ZS-adapted` = list(crit = 3.08085895954179e+52, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + uniform = list(crit = 2.6188594083781e+29, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + Jeffreys = list(crit = 1.96893489476632e+31, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + benchmark = list(crit = 1.88194647439014e+31, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + robust = list(crit = 15.4590106422538, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + `g-prior` = list(crit = 61.0283622188635, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)), + `hyper-g` = list(crit = 88.8999586451438, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + `EB-local` = list(crit = 91.5555793778985, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + `ZS-null` = list(crit = 90.2037895063674, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + `ZS-full` = list(crit = 0, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + BIC = list(crit = -170.850296079658, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + `hyper-g-laplace` = list(crit = 88.8449712472898, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + AIC = list(crit = -166.942540800676, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + `EB-global` = list(crit = 91.5555793778985, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + `hyper-g-n` = list(crit = 90.6511779673767, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + JZS = list(crit = 90.9166314790781, coefs = c(0.971122645911043, 1.02417531317894, -0.0301201893345434)), + `Jeffreys-BIC` = list(crit = -83.4856401007207, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)) + ) + results <- list() for (prior in gaussian_priors) { + if (prior == "TG") next beta_prior$type <- prior - results[[prior]] <- fbms.mlpost.master(data[, 1], x, c(TRUE, TRUE, TRUE), list(beta_prior = beta_prior, family = "gaussian", r = exp(-0.5))) + results[[prior]] <- fbms.mlpost.master(data[, 1], x, c(TRUE, TRUE, TRUE), list(), list(beta_prior = beta_prior, family = "gaussian", r = exp(-0.5))) + expect_equal(results[[prior]]$crit, expected[[prior]]$crit) #mod1 <- fbms(y ~ ., family = family, beta_prior = beta_prior, method = "mjmcmc", data = data, verbose = FALSE) #if (!(prior %in% c("hyper-g", "EB-local", "uniform", "ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS"))) { # validate.model(mod1, x, y) #} } + results2 <- list() + for (prior in gaussian_priors) { + results2[[prior]] <- fbms.mlik.master2(data[, 1], x, c(TRUE, TRUE, TRUE), list(), list(prior_beta = prior, family = "gaussian", r = exp(-0.5), g = 5, a = 3, b = 1, s = 1, rho = 0, v = 1, k = 1)) + } }) -- GitLab From fb221c2b1322d197db5b2616f0e149a61c73a820 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Thu, 3 Apr 2025 17:03:16 +0200 Subject: [PATCH 08/36] added some tests file and new likelihood estimator --- R/likelihoods.R | 71 ++++++++++++++++++++++++++++ tests_current/some tests.R | 95 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) create mode 100644 tests_current/some tests.R diff --git a/R/likelihoods.R b/R/likelihoods.R index f7d8e12..9fb78b2 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -758,5 +758,76 @@ fbms.mlik.master <- function(y, x, model, complex, params = list(family = "gauss stop("Error in computation: Returned result does not contain 'crit' and 'coefs'.") } + return(list(crit = result$crit, coefs = result$coefs)) +} + + + + + +#' Master Log Marginal Likelihood Function +#' +#' This function serves as a unified interface to compute the log marginal likelihood +#' for different regression models and priors by calling specific log likelihood functions. +#' +#' @param y A numeric vector containing the dependent variable. +#' @param x A matrix containing the precalculated features (independent variables). +#' @param model A logical vector indicating which variables to include in the model. +#' @param complex A list of complexity measures for the features. +#' @param params A list of parameters controlling the model family, prior, and tuning parameters. +#' Key elements include: +#' - family: "binomial", "poisson", "gamma" (all three referred to as GLM below), or "gaussian" (default: "gaussian") +#' - prior_beta: Type of prior as a string (default: "g-prior"). Possible values include: +#' - "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +#' - "CH": Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, optionally `s`) +#' - "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires `a` for Gaussian) +#' - "EB-global": Empirical Bayes local prior (Gaussian, requires `a` for Gaussian) +#' - "g-prior": Zellner's g-prior (GLM/Gaussian, requires `g`) +#' - "hyper-g": Hyper-g prior (GLM/Gaussian, requires `a`) +#' - "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires `a`) +#' - "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, `s`, `rho`, `v`, `k`) +#' - "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +#' - "TG": Truncated Gamma prior (GLM/Gamma, requires `a`, `s`) +#' - "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) +#' - "uniform": Uniform prior (GLM/Gaussian, no additional args) +#' - "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +#' - "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +#' - "robust": Robust prior (Gaussian/GLM, no additional args) +#' - "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +#' - "ZS-null": Zellner-Siow null prior (Gaussian, requires `a`) +#' - "ZS-full": Zellner-Siow full prior (Gaussian, requires `a`) +#' - "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires `a`) +#' - "AIC": AIC prior from BAS (Gaussian, requires penalty `a`) +#' - "BIC": BIC prior from BAS (Gaussian/GLM) +#' - "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires `a`) +#' - r: Model complexity penalty (default: 1/n) +#' - g: Tuning parameter for g-prior (default: max(n, p^2)) +#' - a, b, s, v, rho, k: Hyperparameters for various priors +#' - n: Sample size for some priors (default: length(y)) +#' - var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") +#' - laplace: Logical for Laplace approximation in GLM only (default: FALSE) +#' +#' @return A list with elements: +#' \item{crit}{Log marginal likelihood combined with the log prior.} +#' \item{coefs}{Posterior mode of the coefficients.} +#' +#' @examples +#' fbms.mlik.master2(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), list(family = "gaussian", prior_beta = "g-prior")) +#' +#' @importFrom BAS beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform +#' @export +fbms.mlik.master2 <- function(y, x, model, complex, params = list(family = "gaussian", beta_prior = list(type = "g-prior"), r = exp(-0.5))) { + # Extract dimensions + n <- length(y) + p <- sum(model) - 1 # Number of predictors excluding intercept + params_use <- list() + params_use$beta_prior <- gen.mlpost.params(params$beta_prior$type, params$beta_prior, p+1, n) + params_use$beta_prior$type <- params$beta_prior$type + + loglik.pi <- select.mlpost.fun(params$beta_prior$type, params$family) + + result <- loglik.pi(y,x,model,complex,params_use) + + return(list(crit = result$crit, coefs = result$coefs)) } \ No newline at end of file diff --git a/tests_current/some tests.R b/tests_current/some tests.R new file mode 100644 index 0000000..fee3bb0 --- /dev/null +++ b/tests_current/some tests.R @@ -0,0 +1,95 @@ +# Validation script for fbms.mlik.master +# Tests all supported (family, prior) combinations with required parameters + +set.seed(42) # Ensure reproducibility + +# Generate synthetic data +gen_data <- function(family) { + n <- 50 + p <- 3 + x <- cbind(1, matrix(rnorm(n * p), n, p)) # Include intercept + beta <- c(1, -1, 0.5, -0.5) + + if (family == "gaussian") { + y <- x %*% beta + rnorm(n, mean = 0, sd = 1000) + } else if (family == "binomial") { + prob <- 1 / (1 + exp(-x %*% beta + rnorm(n, mean = 0, sd = 1))) + y <- rbinom(n, 1, prob) + } else if (family == "poisson") { + lambda <- exp(x %*% beta + rnorm(n, mean = 0, sd = 1)) + y <- rpois(n, lambda) + } else if (family == "gamma") { + shape <- 2 + rate <- exp(-x %*% beta + rnorm(n, mean = 0, sd = 1)) + y <- rgamma(n, shape = shape, rate = rate) + } else { + stop("Unsupported family") + } + + list(y = as.vector(y), x = x) +} + +# Define prior lists +glm_and_gaussian_priors <- c("ZS-adapted", "beta.prime", "EB-local", "g-prior", "hyper-g", "hyper-g-n", + "intrinsic", "Jeffreys", "uniform", "benchmark", "robust", "Jeffreys-BIC", + "CH", "tCCH", "TG") +gaussian_only_priors <- c("ZS-null", "ZS-full", "hyper-g-laplace", "AIC", "JZS","EB-global") + +glm_priors <- glm_and_gaussian_priors +gaussian_priors <- c(glm_and_gaussian_priors, gaussian_only_priors) + +families <- c("gaussian", "binomial", "poisson", "gamma") + +# Required parameters for priors +prior_params <- list( + "g-prior" = list(g = 10), + "hyper-g" = list(a = 3), + "hyper-g-n" = list(a = 3), + "ZS-null" = list(a = 3), + "ZS-full" = list(a = 3), + "hyper-g-laplace" = list(a = 3), + "AIC" = list(a = 3), + "JZS" = list(a = 3), + "EB-global" = list(a = 3), + "EB-local" = list(a = 3), + "CH" = list(a = 1, b = 2, s = 1), + "tCCH" = list(a = 1, b = 2, s = 0, rho = 1, v = 1, k = 1), + "TG" = list(a = 2, s = 1) +) + +# Testing loop +for (family in families) { + priors <- if (family == "gaussian") gaussian_priors else glm_priors + data <- gen_data(family) + + cat("\n===== Testing family:", family, "=====") + + for (prior in priors) { + params <- list(family = family,beta_prior = list(type = prior)) + + # Add required parameters if applicable + + if (prior %in% names(prior_params)) { + params$beta_prior <- c(params$beta_prior, prior_params[[prior]]) + } + + # Run the model + tryCatch({ + result <- fbms.mlik.master2(data$y, data$x, model = c(T, rep(TRUE, ncol(data$x) - 1)), + complex = list(oc = 1), params = params) + result.null <- fbms.mlik.master2(data$y, data$x, model = c(T, T, rep(FALSE, ncol(data$x) - 2)), + complex = list(oc = 1), params = params) + + + crit_rounded <- round(result$crit - result.null$crit, 8) + coefs_mean <- round(mean(result$coefs - result.null$coefs), 8) + + cat(sprintf("\nPrior: %-15s -> crit: %8.4f, mean(coefs): %8.4f", prior, crit_rounded, coefs_mean)) + + print("Finished") + + }, error = function(e) { + cat(sprintf("\nPrior: %-15s -> ERROR: %s", prior, conditionMessage(e))) + }) + } +} -- GitLab From 2a488dfc0d46600d7c73cc9d994901c170a34204 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Fri, 4 Apr 2025 16:03:04 +0200 Subject: [PATCH 09/36] split started --- NAMESPACE | 3 +- R/fbms.R | 116 +++++++++++++++++++++++----- R/likelihoods.R | 14 +++- man/fbms.Rd | 2 + man/fbms.mlik.master.Rd | 55 ++++++------- man/fbms.mlik.master2.Rd | 73 +++++++++++++++++ man/gaussian.loglik.Rd | 2 +- man/gaussian_tcch_log_likelihood.Rd | 2 +- man/glm.loglik.Rd | 40 ++++++++++ man/glm.logpost.bas.Rd | 6 +- man/gmjmcmc.Rd | 14 ++-- man/gmjmcmc.parallel.Rd | 8 +- man/lm.logpost.bas.Rd | 4 +- man/mjmcmc.Rd | 14 ++-- man/print.feature.Rd | 10 ++- tests/testthat/test_priors.R | 7 +- tests_current/Ex1_Sec3.R | 2 +- tests_current/some tests.R | 5 +- 18 files changed, 295 insertions(+), 82 deletions(-) create mode 100644 man/fbms.mlik.master2.Rd create mode 100644 man/glm.loglik.Rd diff --git a/NAMESPACE b/NAMESPACE index 5f0e6f7..6a49e38 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,6 +22,7 @@ export(erf) export(exp_dbl) export(fbms) export(fbms.mlik.master) +export(fbms.mlik.master2) export(gauss) export(gaussian.loglik) export(gaussian.loglik.alpha) @@ -34,6 +35,7 @@ export(gen.probs.gmjmcmc) export(gen.probs.mjmcmc) export(get.best.model) export(get.mpm.model) +export(glm.loglik) export(glm.logpost.bas) export(gmjmcmc) export(gmjmcmc.parallel) @@ -93,7 +95,6 @@ importFrom(BAS,hypergeometric2F1) importFrom(BAS,intrinsic) importFrom(BAS,phi1) importFrom(BAS,tCCH) -importFrom(BAS,testBF.prior) importFrom(BAS,uniform) importFrom(GenSA,GenSA) importFrom(Rcpp,sourceCpp) diff --git a/R/fbms.R b/R/fbms.R index 37f41cb..38e4201 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -48,7 +48,10 @@ fbms <- function ( if (is.list(beta_prior) || is.list(model_prior)) { mlpost_params <- model_prior loglik.pi <- select.mlpost.fun(beta_prior$type, family) - mlpost_params$beta_prior <- gen.mlpost.params(beta_prior$type, beta_prior, ncol(data) - 1, nrow(data)) + if(family == "gaussian") + mlpost_params$beta_prior <- gen.mlpost.params.lm(beta_prior$type, beta_prior, ncol(data) - 1, nrow(data)) + else + mlpost_params$beta_prior <- gen.mlpost.params.glm(beta_prior$type, beta_prior, ncol(data) - 1, nrow(data)) mlpost_params$beta_prior$type <- beta_prior$type } else { if (family == "gaussian") @@ -147,8 +150,12 @@ fbms.mlpost.master <- function (y, x, model, complex, params = list(family = "ga return(loglik.pi(y, x, model, complex, params_use)) } -gen.mlpost.params <- function (beta_prior, user_params, p, n) { - if (beta_prior == "beta.prime") { +gen.mlpost.params.glm <- function (beta_prior, user_params, p, n) { + + if(beta_prior == "Jeffreys-BIC") + { + return(NULL) + } else if(beta_prior == "beta.prime") { return(BAS::beta.prime(n = n)) } else if (beta_prior == "CH") { check_required_params(c("a", "b", "s"), user_params, beta_prior) @@ -195,25 +202,96 @@ gen.mlpost.params <- function (beta_prior, user_params, p, n) { return(BAS::hyper.g.n(alpha = user_params$a, n = n)) } else if (beta_prior == "BIC") { return(BAS::bic.prior(n = n)) - } else if (beta_prior == "ZS-null") { - return(list(method = 4)) - } else if (beta_prior == "ZS-full") { - return(list(method = 5)) - } else if (beta_prior == "hyper-g-laplace") { - return(list(method = 6)) - } else if (beta_prior == "AIC") { - return(list(method = 7)) - } else if (beta_prior == "EB-global") { - return(list(method = 2)) - } else if (beta_prior == "JZS") { - return(list(method = 9)) - } else if (beta_prior == "Jeffreys-BIC") { - return(NULL) - } - + } stop("Unknown prior, please verify your inputs.") } + +gen.mlpost.params.lm <- function (beta_prior, user_params, p, n) { + + if (beta_prior == "Jeffreys-BIC") { + if(!is.null(user_params$var)) + { + var <- "unknown" + } + return(var) + }else if (beta_prior == "beta.prime") { + return(list("beta.prime")) + } else if (beta_prior == "CH") { + check_required_params(c("a", "b", "s"), user_params, beta_prior) + user_params <- list( + "CH", + a = user_params$a, + b = user_params$b, + s = user_params$s + ) + return(user_params) + } else if (beta_prior == "tCCH") { + check_required_params(c("a", "b", "s", "rho", "v", "k"), user_params, beta_prior) + user_params <- list( + "tCCH", + a = user_params$a, + b = user_params$b, + s = user_params$s, + rho = user_params$rho, + v = user_params$v, + k = user_params$k + ) + return(user_params) + } else if (beta_prior == "intrinsic") { + return(list("intrinsic")) + } else if (beta_prior == "TG") { + check_required_params(c("a", "s"), user_params, beta_prior) + user_params <- list( + "TG", + a = user_params$a, + s = user_params$s + ) + return(user_params) + } else if (beta_prior == "Jeffreys") { + return(list("Jeffreys")) + } else if (beta_prior == "ZS-adapted") { + return(list("ZS-adapted")) + } else if (beta_prior == "benchmark") { + return(list("benchmark")) + } else if (beta_prior == "robust") { + return(list("robust")) + } else if (beta_prior == "uniform") { + return(list("uniform")) + } else{ + if(!is.null(user_params$a)) + { + alpha <- user_params$a + } else + alpha <- -1 + if (beta_prior == "g-prior") { + return(list(method = 0, alpha = alpha)) + } else if (beta_prior == "hyper-g") { + return(list(method = 1, alpha = alpha)) + } else if (beta_prior == "EB-local") { + return(list(method = 2, alpha = alpha)) + } else if (beta_prior == "BIC") { + return(list(method = 3, alpha = alpha)) + } else if (beta_prior == "ZS-null") { + return(list(method = 4, alpha = alpha)) + } else if (beta_prior == "ZS-full") { + return(list(method = 5, alpha = alpha)) + } else if (beta_prior == "hyper-g-laplace") { + return(list(method = 6, alpha = alpha)) + } else if (beta_prior == "AIC") { + return(list(method = 7, alpha = alpha)) + } else if (beta_prior == "EB-global") { + return(list(method = 2, alpha = alpha)) + } else if (beta_prior == "JZS") { + return(list(method = 9, alpha = alpha)) + } else if (beta_prior == "hyper-g-n") { + return(list(method = 8, alpha = alpha)) + } else { + stop("Unrecognized prior_beta for Gaussian GLM: ", beta_prior) + } + } +} + check_required_params <- function (required, user_params, beta_prior) { for (req in required) { if (is.null(user_params[[req]]) || !is.numeric(user_params[[req]])) { diff --git a/R/likelihoods.R b/R/likelihoods.R index 9fb78b2..0cd8bab 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -21,7 +21,7 @@ #' @importFrom BAS uniform Jeffreys g.prior #' @importFrom stats poisson Gamma glm.control #' @export glm.logpost.bas -glm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), family = "binomial", prior_beta = Jeffreys(), laplace = FALSE)) { +glm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), family = "binomial", beta_prior = list(type = Jeffreys()), laplace = FALSE)) { if (length(params) == 0) params <- list(r = 1 / dim(x)[1], family = "binomial", prior_beta = g.prior(max(dim(x)[1], sum(model) - 1)), laplace = FALSE) p <- sum(model) - 1 @@ -821,11 +821,19 @@ fbms.mlik.master2 <- function(y, x, model, complex, params = list(family = "gaus n <- length(y) p <- sum(model) - 1 # Number of predictors excluding intercept params_use <- list() - params_use$beta_prior <- gen.mlpost.params(params$beta_prior$type, params$beta_prior, p+1, n) - params_use$beta_prior$type <- params$beta_prior$type + + if(params$family == "gaussian") + params_use$beta_prior <- gen.mlpost.params.lm(params$beta_prior$type, params$beta_prior, p+1, n) + else + params_use$beta_prior <- gen.mlpost.params.glm(params$beta_prior$type, params$beta_prior, p+1, n) + params_use$beta_prior$type <- beta_prior$type + + + browser() loglik.pi <- select.mlpost.fun(params$beta_prior$type, params$family) + result <- loglik.pi(y,x,model,complex,params_use) diff --git a/man/fbms.Rd b/man/fbms.Rd index 55e251c..185020e 100644 --- a/man/fbms.Rd +++ b/man/fbms.Rd @@ -8,6 +8,8 @@ Or Fit a BGLM model using Modified Mode Jumping Markov Chain Monte Carlo (MCMC) fbms( formula = NULL, family = "gaussian", + beta_prior = list(type = "g-prior", g = 5), + model_prior = list(r = exp(-0.5)), data = NULL, impute = FALSE, loglik.pi = gaussian.loglik, diff --git a/man/fbms.mlik.master.Rd b/man/fbms.mlik.master.Rd index b36851f..9ee5f71 100644 --- a/man/fbms.mlik.master.Rd +++ b/man/fbms.mlik.master.Rd @@ -24,47 +24,38 @@ fbms.mlik.master( \item{params}{A list of parameters controlling the model family, prior, and tuning parameters. Key elements include: \itemize{ -\item family: "binomial", "poisson", "gamma", or "gaussian" (default: "gaussian") +\item family: "binomial", "poisson", "gamma" (all three referred to as GLM below), or "gaussian" (default: "gaussian") \item prior_beta: Type of prior as a string (default: "g-prior"). Possible values include: \itemize{ -\item "beta.prime": Beta-prime prior (GLM, requires \code{n}) -\item "bic.prior": BIC-based prior (GLM, requires \code{n}) -\item "CCH": Chen-Clyde-Hsu prior (GLM, requires \code{a}, \code{b}, optionally \code{s}) -\item "EB.local": Empirical Bayes local prior (GLM/Gaussian BAS, requires \code{alpha} for Gaussian) +\item "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +\item "CH": Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, optionally \code{s}) +\item "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires \code{a} for Gaussian) +\item "EB-global": Empirical Bayes local prior (Gaussian, requires \code{a} for Gaussian) \item "g-prior": Zellner's g-prior (GLM/Gaussian, requires \code{g}) -\item "hyper.g": Hyper-g prior (GLM, requires \code{a}) -\item "hyper.g.n": Hyper-g/n prior (GLM, requires \code{a}, \code{n}) -\item "tCCH": Truncated Chen-Clyde-Hsu prior (GLM, requires \code{a}, \code{b}, optionally \code{s}, \code{rho}, \code{v}, \code{k}) -\item "intrinsic": Intrinsic prior (GLM, requires \code{n}) -\item "testBF.prior": Test Bayes factor prior (GLM, requires \code{g}) -\item "TG": Truncated Gamma prior (GLM, requires \code{a}) +\item "hyper-g": Hyper-g prior (GLM/Gaussian, requires \code{a}) +\item "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires \code{a}) +\item "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, \code{s}, \code{rho}, \code{v}, \code{k}) +\item "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +\item "TG": Truncated Gamma prior (GLM/Gamma, requires \code{a}, \code{s}) \item "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) -\item "uniform": Uniform prior (GLM, no additional args) -\item "CH": Custom Chen-Hsu prior (Gaussian TCCH, requires \code{a}, \code{b}, optionally \code{s}) -\item "Hyper-g": Hyper-g prior (Gaussian TCCH, no additional args) -\item "Uniform": Uniform prior (Gaussian TCCH, no additional args) -\item "Beta-prime": Beta-prime prior (Gaussian TCCH, no additional args) -\item "Benchmark": Benchmark prior (Gaussian TCCH, no additional args) -\item "TruncGamma": Truncated Gamma prior (Gaussian TCCH, requires \code{at}, \code{st}) -\item "ZS adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) -\item "Robust": Robust prior (Gaussian TCCH, no additional args) -\item "Hyper-g/n": Hyper-g/n prior (Gaussian TCCH, no additional args) -\item "Intrinsic": Intrinsic prior (Gaussian TCCH, no additional args) -\item "hyper-g": Hyper-g prior (Gaussian BAS, requires \code{alpha}) -\item "BIC": BIC prior (Gaussian BAS, requires \code{alpha}) -\item "ZS-null": Zellner-Siow null prior (Gaussian BAS, requires \code{alpha}) -\item "ZS-full": Zellner-Siow full prior (Gaussian BAS, requires \code{alpha}) -\item "hyper-g-laplace": Hyper-g Laplace prior (Gaussian BAS, requires \code{alpha}) -\item "AIC": AIC prior (Gaussian BAS, requires \code{alpha}) -\item "JZS": Jeffreys-Zellner-Siow prior (Gaussian BAS, requires \code{alpha}) +\item "uniform": Uniform prior (GLM/Gaussian, no additional args) +\item "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +\item "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +\item "robust": Robust prior (Gaussian/GLM, no additional args) +\item "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +\item "ZS-null": Zellner-Siow null prior (Gaussian, requires \code{a}) +\item "ZS-full": Zellner-Siow full prior (Gaussian, requires \code{a}) +\item "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires \code{a}) +\item "AIC": AIC prior from BAS (Gaussian, requires penalty \code{a}) +\item "BIC": BIC prior from BAS (Gaussian/GLM) +\item "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires \code{a}) } -\item r: Model complexity penalty (default: 1/length(y)) +\item r: Model complexity penalty (default: 1/n) \item g: Tuning parameter for g-prior (default: max(n, p^2)) \item a, b, s, v, rho, k: Hyperparameters for various priors -\item at, st: Additional parameters for TruncGamma prior \item n: Sample size for some priors (default: length(y)) \item var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") -\item laplace: Logical for Laplace approximation in GLM (default: FALSE) +\item laplace: Logical for Laplace approximation in GLM only (default: FALSE) }} } \value{ diff --git a/man/fbms.mlik.master2.Rd b/man/fbms.mlik.master2.Rd new file mode 100644 index 0000000..71cad14 --- /dev/null +++ b/man/fbms.mlik.master2.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods.R +\name{fbms.mlik.master2} +\alias{fbms.mlik.master2} +\title{Master Log Marginal Likelihood Function} +\usage{ +fbms.mlik.master2( + y, + x, + model, + complex, + params = list(family = "gaussian", beta_prior = list(type = "g-prior"), r = exp(-0.5)) +) +} +\arguments{ +\item{y}{A numeric vector containing the dependent variable.} + +\item{x}{A matrix containing the precalculated features (independent variables).} + +\item{model}{A logical vector indicating which variables to include in the model.} + +\item{complex}{A list of complexity measures for the features.} + +\item{params}{A list of parameters controlling the model family, prior, and tuning parameters. +Key elements include: +\itemize{ +\item family: "binomial", "poisson", "gamma" (all three referred to as GLM below), or "gaussian" (default: "gaussian") +\item prior_beta: Type of prior as a string (default: "g-prior"). Possible values include: +\itemize{ +\item "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +\item "CH": Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, optionally \code{s}) +\item "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires \code{a} for Gaussian) +\item "EB-global": Empirical Bayes local prior (Gaussian, requires \code{a} for Gaussian) +\item "g-prior": Zellner's g-prior (GLM/Gaussian, requires \code{g}) +\item "hyper-g": Hyper-g prior (GLM/Gaussian, requires \code{a}) +\item "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires \code{a}) +\item "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, \code{s}, \code{rho}, \code{v}, \code{k}) +\item "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +\item "TG": Truncated Gamma prior (GLM/Gamma, requires \code{a}, \code{s}) +\item "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) +\item "uniform": Uniform prior (GLM/Gaussian, no additional args) +\item "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +\item "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +\item "robust": Robust prior (Gaussian/GLM, no additional args) +\item "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +\item "ZS-null": Zellner-Siow null prior (Gaussian, requires \code{a}) +\item "ZS-full": Zellner-Siow full prior (Gaussian, requires \code{a}) +\item "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires \code{a}) +\item "AIC": AIC prior from BAS (Gaussian, requires penalty \code{a}) +\item "BIC": BIC prior from BAS (Gaussian/GLM) +\item "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires \code{a}) +} +\item r: Model complexity penalty (default: 1/n) +\item g: Tuning parameter for g-prior (default: max(n, p^2)) +\item a, b, s, v, rho, k: Hyperparameters for various priors +\item n: Sample size for some priors (default: length(y)) +\item var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") +\item laplace: Logical for Laplace approximation in GLM only (default: FALSE) +}} +} +\value{ +A list with elements: +\item{crit}{Log marginal likelihood combined with the log prior.} +\item{coefs}{Posterior mode of the coefficients.} +} +\description{ +This function serves as a unified interface to compute the log marginal likelihood +for different regression models and priors by calling specific log likelihood functions. +} +\examples{ +fbms.mlik.master2(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), list(family = "gaussian", prior_beta = "g-prior")) + +} diff --git a/man/gaussian.loglik.Rd b/man/gaussian.loglik.Rd index b0bbae6..5b168e0 100644 --- a/man/gaussian.loglik.Rd +++ b/man/gaussian.loglik.Rd @@ -25,6 +25,6 @@ Log likelihood function for gaussian regression with a Jeffreys prior and BIC ap } \examples{ gaussian.loglik(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), NULL) - + } diff --git a/man/gaussian_tcch_log_likelihood.Rd b/man/gaussian_tcch_log_likelihood.Rd index bba88fb..15fe9cf 100644 --- a/man/gaussian_tcch_log_likelihood.Rd +++ b/man/gaussian_tcch_log_likelihood.Rd @@ -9,7 +9,7 @@ gaussian_tcch_log_likelihood( x, model, complex, - params = list(r = exp(-0.5), prior_beta = "Intrinsic") + params = list(r = exp(-0.5), beta_prior = "intrinsic") ) } \arguments{ diff --git a/man/glm.loglik.Rd b/man/glm.loglik.Rd new file mode 100644 index 0000000..3cc398d --- /dev/null +++ b/man/glm.loglik.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods.R +\name{glm.loglik} +\alias{glm.loglik} +\title{Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model.} +\usage{ +glm.loglik( + y, + x, + model, + complex, + params = list(r = exp(-0.5), family = "Gamma") +) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user, family must be specified} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model. +} +\examples{ +glm.loglik(abs(rnorm(100))+1, matrix(rnorm(100)), TRUE, list(oc = 1)) + + +} diff --git a/man/glm.logpost.bas.Rd b/man/glm.logpost.bas.Rd index 50eb625..d8930aa 100644 --- a/man/glm.logpost.bas.Rd +++ b/man/glm.logpost.bas.Rd @@ -11,8 +11,8 @@ glm.logpost.bas( x, model, complex, - params = list(r = exp(-0.5), family = "binomial", prior_beta = Jeffreys(), laplace = - FALSE) + params = list(r = exp(-0.5), family = "binomial", beta_prior = list(type = Jeffreys()), + laplace = FALSE) ) } \arguments{ @@ -35,6 +35,6 @@ This function is created as an example of how to create an estimator that is use to calculate the marginal likelihood of a model. } \examples{ -glm.logpost.bas(as.integer(rnorm(100) > 0),cbind(1,matrix(rnorm(100))),c(TRUE,TRUE),list(oc = 1)) +glm.logpost.bas(as.integer(rnorm(100) > 0), cbind(1, matrix(rnorm(100))), c(TRUE, TRUE), list(oc = 1)) } diff --git a/man/gmjmcmc.Rd b/man/gmjmcmc.Rd index 471e016..8e6d661 100644 --- a/man/gmjmcmc.Rd +++ b/man/gmjmcmc.Rd @@ -5,10 +5,14 @@ \title{Main algorithm for GMJMCMC (Genetically Modified MJMCMC)} \usage{ gmjmcmc( - data, + x, + y, loglik.pi = gaussian.loglik, loglik.alpha = gaussian.loglik.alpha, + mlpost_params = NULL, transforms, + intercept = FALSE, + fixed = 0, P = 10, N.init = 100, N.final = 100, @@ -19,10 +23,6 @@ gmjmcmc( ) } \arguments{ -\item{data}{A matrix containing the data to use in the algorithm, -first column should be the dependent variable, -and the rest of the columns should be the independent variables.} - \item{loglik.pi}{The (log) density to explore} \item{loglik.alpha}{The likelihood function to use for alpha calculation} @@ -45,6 +45,10 @@ A larger value like $P = 50$ might be more realistic for more complicated exampl \item{sub}{An indicator that if the likelihood is inexact and should be improved each model visit (EXPERIMENTAL!)} \item{verbose}{A logical denoting if messages should be printed} + +\item{data}{A matrix containing the data to use in the algorithm, +first column should be the dependent variable, +and the rest of the columns should be the independent variables.} } \value{ A list containing the following elements: diff --git a/man/gmjmcmc.parallel.Rd b/man/gmjmcmc.parallel.Rd index d51130d..ee0a167 100644 --- a/man/gmjmcmc.parallel.Rd +++ b/man/gmjmcmc.parallel.Rd @@ -8,9 +8,11 @@ gmjmcmc.parallel( runs = 2, cores = getOption("mc.cores", 2L), merge.options = list(populations = "best", complex.measure = 2, tol = 1e-07), - data, + x, + y, loglik.pi = gaussian.loglik, loglik.alpha = gaussian.loglik.alpha, + mlpost_params = NULL, transforms, ... ) @@ -22,10 +24,6 @@ gmjmcmc.parallel( \item{merge.options}{A list of options to pass to the \code{\link[=merge_results]{merge_results()}} function run after the} -\item{data}{A matrix containing the data to use in the algorithm, -first column should be the dependent variable, -and the rest of the columns should be the independent variables.} - \item{loglik.pi}{The (log) density to explore} \item{loglik.alpha}{The likelihood function to use for alpha calculation} diff --git a/man/lm.logpost.bas.Rd b/man/lm.logpost.bas.Rd index 29066e8..61d176d 100644 --- a/man/lm.logpost.bas.Rd +++ b/man/lm.logpost.bas.Rd @@ -11,7 +11,7 @@ lm.logpost.bas( x, model, complex, - params = list(r = exp(-0.5), prior_beta = "g-prior", alpha = 4) + params = list(r = exp(-0.5), beta_prior = list(method = 1)) ) } \arguments{ @@ -23,7 +23,7 @@ lm.logpost.bas( \item{complex}{A list of complexity measures for the features} -\item{params}{A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors and in Gaussian models} +\item{params}{A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors where the corresponding integers as prior_beta must be provided "g-prior" = 0, "hyper-g" = 1, "EB-local" = 2, "BIC" = 3, "ZS-null" = 4, "ZS-full" = 5, "hyper-g-laplace" = 6, "AIC" = 7, "EB-global" = 2, "hyper-g-n" = 8, "JZS" = 9 and in Gaussian models} } \value{ A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). diff --git a/man/mjmcmc.Rd b/man/mjmcmc.Rd index 9d2fe58..d2cff5e 100644 --- a/man/mjmcmc.Rd +++ b/man/mjmcmc.Rd @@ -5,8 +5,12 @@ \title{Main algorithm for MJMCMC (Genetically Modified MJMCMC)} \usage{ mjmcmc( - data, + x, + y, loglik.pi = gaussian.loglik, + mlpost_params = NULL, + intercept = FALSE, + fixed = 0, N = 100, probs = NULL, params = NULL, @@ -15,10 +19,6 @@ mjmcmc( ) } \arguments{ -\item{data}{A matrix containing the data to use in the algorithm, -first column should be the dependent variable, -and the rest of the columns should be the independent variables.} - \item{loglik.pi}{The (log) density to explore} \item{N}{The number of iterations to run for} @@ -30,6 +30,10 @@ and the rest of the columns should be the independent variables.} \item{sub}{An indicator that if the likelihood is inexact and should be improved each model visit (EXPERIMENTAL!)} \item{verbose}{A logical denoting if messages should be printed} + +\item{data}{A matrix containing the data to use in the algorithm, +first column should be the dependent variable, +and the rest of the columns should be the independent variables.} } \value{ A list containing the following elements: diff --git a/man/print.feature.Rd b/man/print.feature.Rd index 06eb37a..89a1951 100644 --- a/man/print.feature.Rd +++ b/man/print.feature.Rd @@ -4,7 +4,15 @@ \alias{print.feature} \title{Print method for "feature" class} \usage{ -\method{print}{feature}(x, dataset = FALSE, alphas = FALSE, labels = FALSE, round = FALSE, ...) +\method{print}{feature}( + x, + dataset = FALSE, + fixed = 0, + alphas = FALSE, + labels = FALSE, + round = FALSE, + ... +) } \arguments{ \item{x}{An object of class "feature"} diff --git a/tests/testthat/test_priors.R b/tests/testthat/test_priors.R index ccd7ee0..9836c77 100644 --- a/tests/testthat/test_priors.R +++ b/tests/testthat/test_priors.R @@ -63,12 +63,13 @@ test_that("Test various priors through the fbms function", { `Jeffreys-BIC` = list(crit = -83.4856401007207, coefs = c(X1 = 0.971122645911043, X2 = 1.02417531317894, X3 = -0.0301201893345434)) ) - results <- list() for (prior in gaussian_priors) { if (prior == "TG") next beta_prior$type <- prior results[[prior]] <- fbms.mlpost.master(data[, 1], x, c(TRUE, TRUE, TRUE), list(), list(beta_prior = beta_prior, family = "gaussian", r = exp(-0.5))) - expect_equal(results[[prior]]$crit, expected[[prior]]$crit) + print(prior) + print(results[[prior]]$crit) + #expect_equal(results[[prior]]$crit, expected[[prior]]$crit) #mod1 <- fbms(y ~ ., family = family, beta_prior = beta_prior, method = "mjmcmc", data = data, verbose = FALSE) #if (!(prior %in% c("hyper-g", "EB-local", "uniform", "ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS"))) { # validate.model(mod1, x, y) @@ -77,6 +78,8 @@ test_that("Test various priors through the fbms function", { results2 <- list() for (prior in gaussian_priors) { results2[[prior]] <- fbms.mlik.master2(data[, 1], x, c(TRUE, TRUE, TRUE), list(), list(prior_beta = prior, family = "gaussian", r = exp(-0.5), g = 5, a = 3, b = 1, s = 1, rho = 0, v = 1, k = 1)) + print(prior) + print(results[[prior]]$crit) } }) diff --git a/tests_current/Ex1_Sec3.R b/tests_current/Ex1_Sec3.R index ccba4f8..595af57 100644 --- a/tests_current/Ex1_Sec3.R +++ b/tests_current/Ex1_Sec3.R @@ -37,7 +37,7 @@ use.fbms = FALSE #################################################### params <- gen.params.gmjmcmc(df.train) -params$mlpost$var <- "unknown" +params$mlpost$var <- 1 if (use.fbms) { result.default <- fbms(formula = semimajoraxis ~ 1 + . , data = df.train, method = "gmjmcmc", transforms = transforms, params = params) diff --git a/tests_current/some tests.R b/tests_current/some tests.R index fee3bb0..eecdf78 100644 --- a/tests_current/some tests.R +++ b/tests_current/some tests.R @@ -38,7 +38,7 @@ gaussian_only_priors <- c("ZS-null", "ZS-full", "hyper-g-laplace", "AIC", "JZS", glm_priors <- glm_and_gaussian_priors gaussian_priors <- c(glm_and_gaussian_priors, gaussian_only_priors) -families <- c("gaussian", "binomial", "poisson", "gamma") +families <- c("gaussian", "binomial", "poisson", "gamma")[2:4] # Required parameters for priors prior_params <- list( @@ -65,6 +65,9 @@ for (family in families) { cat("\n===== Testing family:", family, "=====") for (prior in priors) { + + print(prior) + params <- list(family = family,beta_prior = list(type = prior)) # Add required parameters if applicable -- GitLab From cdf8a106e96cd0ceffdd81e213006f37218f7dc1 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Fri, 4 Apr 2025 16:43:26 +0200 Subject: [PATCH 10/36] glm families fixed in fbms.mlik.master2 --- R/fbms.R | 2 +- R/likelihoods.R | 24 ++++++++++++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/R/fbms.R b/R/fbms.R index 38e4201..03e2998 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -196,7 +196,7 @@ gen.mlpost.params.glm <- function (beta_prior, user_params, p, n) { } else if (beta_prior == "ZS-adapted") { return(BAS::tCCH(alpha = 1, beta = 2, s = n + 3, r = 0, v = 1, theta = 1)) } else if (beta_prior == "robust") { - return(BAS::robust(n = n)) + return(BAS::robust(n = as.numeric(n)))# important to cast to numeric for BAS, do not change. } else if (beta_prior == "hyper-g-n") { if (is.null(user_params$a)) user_params$a <- 3 return(BAS::hyper.g.n(alpha = user_params$a, n = n)) diff --git a/R/likelihoods.R b/R/likelihoods.R index 0cd8bab..16a5691 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -21,9 +21,11 @@ #' @importFrom BAS uniform Jeffreys g.prior #' @importFrom stats poisson Gamma glm.control #' @export glm.logpost.bas -glm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), family = "binomial", beta_prior = list(type = Jeffreys()), laplace = FALSE)) { +glm.logpost.bas <- function (y, x, model, complex, params = list(r = NULL, family = "binomial", beta_prior = Jeffreys(), laplace = FALSE)) { if (length(params) == 0) - params <- list(r = 1 / dim(x)[1], family = "binomial", prior_beta = g.prior(max(dim(x)[1], sum(model) - 1)), laplace = FALSE) + params <- list(r = 1 / dim(x)[1], family = "binomial", beta_prior = g.prior(max(dim(x)[1], sum(model) - 1)), laplace = FALSE) + else if(length(params$r) == 0) + params$r = 1 / dim(x)[1] p <- sum(model) - 1 if (p == 0) { probinit <- as.numeric(c(1, 0.99)) @@ -51,7 +53,7 @@ glm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), Rprobinit = probinit, Rmodeldim = as.integer(rep(0, ifelse(p == 0,2,1))), modelprior = uniform(), - betaprior = params$prior_beta, + betaprior = params$beta_prior, family = family_use, Rcontrol = glm.control(), Rlaplace = as.integer(params$laplace) @@ -99,7 +101,7 @@ lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), b params <- list( r = 1 / dim(x)[1], beta_prior = list(method = 0, alpha = max(dim(x)[1], sum(model)^2)) - ) + ) else if(length(params$r) == 0) params$r = 1 / dim(x)[1] p <- sum(model) - 1 if (p == 0) { @@ -165,6 +167,8 @@ lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), b logistic.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5))) { if (length(params) == 0) params <- list(r = 1 / dim(x)[1]) + else if(length(params$r) == 0) + params$r = 1 / dim(x)[1] suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = binomial())}) ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * sum(complex$oc))) / 2 return(list(crit = ret, coefs = mod$coefficients)) @@ -190,6 +194,8 @@ logistic.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5))) glm.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5), family = "Gamma")) { if (length(params) == 0) params <- list(r = 1 / dim(x)[1]) + else if(length(params$r) == 0) + params$r = 1 / dim(x)[1] if (params$family == "binomial") { fam = binomial() @@ -199,6 +205,7 @@ glm.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5), famil fam = Gamma() } + #browser() suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = fam)}) ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * sum(complex$oc))) / 2 return(list(crit = ret, coefs = mod$coefficients)) @@ -826,16 +833,13 @@ fbms.mlik.master2 <- function(y, x, model, complex, params = list(family = "gaus params_use$beta_prior <- gen.mlpost.params.lm(params$beta_prior$type, params$beta_prior, p+1, n) else params_use$beta_prior <- gen.mlpost.params.glm(params$beta_prior$type, params$beta_prior, p+1, n) - params_use$beta_prior$type <- beta_prior$type - - - browser() + params_use$family <- params$family loglik.pi <- select.mlpost.fun(params$beta_prior$type, params$family) - + #browser() result <- loglik.pi(y,x,model,complex,params_use) - + return(list(crit = result$crit, coefs = result$coefs)) } \ No newline at end of file -- GitLab From 0f1b776243141b3b1ee3e25347427808b754075f Mon Sep 17 00:00:00 2001 From: aliaksah Date: Fri, 4 Apr 2025 17:19:18 +0200 Subject: [PATCH 11/36] Jeffreys-BIC() under gaussian likelihood fixed --- R/fbms.R | 6 +++--- R/likelihoods.R | 19 ++++++++++--------- tests_current/some tests.R | 2 +- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/R/fbms.R b/R/fbms.R index 03e2998..47f774d 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -210,11 +210,11 @@ gen.mlpost.params.glm <- function (beta_prior, user_params, p, n) { gen.mlpost.params.lm <- function (beta_prior, user_params, p, n) { if (beta_prior == "Jeffreys-BIC") { - if(!is.null(user_params$var)) + if(length(user_params$var)==0) { - var <- "unknown" + user_params$var <- "unknown" } - return(var) + return(list(var = user_params$var)) }else if (beta_prior == "beta.prime") { return(list("beta.prime")) } else if (beta_prior == "CH") { diff --git a/R/likelihoods.R b/R/likelihoods.R index 16a5691..388b696 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -121,7 +121,7 @@ lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), b probinit, as.integer(rep(0, ifelse(p == 0,2,1))), incint = as.integer(F), - alpha = ifelse(length(params$beta_prior$hyper.parameters$alpha) > 0, as.numeric(params$beta_prior$hyper.parameters$alpha),NULL), + alpha = ifelse(length(params$beta_prior$a) > 0, as.numeric(params$beta_prior$a),NULL), method = as.integer(params$beta_prior$method), modelprior = uniform(), Rpivot = TRUE, @@ -133,7 +133,7 @@ lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), b # You can also print a message or log the error if needed cat("An error occurred:", conditionMessage(e), "\n") }) - + #browser() if (length(mod) == 0) { return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)), coefs = rep(0, p + 1))) } @@ -234,14 +234,14 @@ gaussian.loglik <- function (y, x, model, complex, params) { params <- list() if (length(params$r) == 0) params$r <- 1/dim(x)[1] - if (length(params$var) == 0) - params$var <- 1 + if (length(params$beta_prior$var) == 0) + params$beta_prior$var <- 1 suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = gaussian())}) - if (params$var == "unknown") + if (params$beta_prior$var == "unknown") ret <- (-(mod$aic + (log(length(y))-2) * (mod$rank) - 2 * log(params$r) * (sum(complex$oc)))) / 2 else - ret <- (-(mod$deviance/params$var + log(length(y)) * (mod$rank - 1) - 2 * log_prior(params, complex))) / 2 + ret <- (-(mod$deviance/params$beta_prior$var + log(length(y)) * (mod$rank - 1) - 2 * log_prior(params, complex))) / 2 return(list(crit = ret, coefs = mod$coefficients)) } @@ -832,12 +832,13 @@ fbms.mlik.master2 <- function(y, x, model, complex, params = list(family = "gaus if(params$family == "gaussian") params_use$beta_prior <- gen.mlpost.params.lm(params$beta_prior$type, params$beta_prior, p+1, n) else + { params_use$beta_prior <- gen.mlpost.params.glm(params$beta_prior$type, params$beta_prior, p+1, n) - params_use$family <- params$family - + params_use$family <- params$family + } loglik.pi <- select.mlpost.fun(params$beta_prior$type, params$family) - #browser() + browser() result <- loglik.pi(y,x,model,complex,params_use) diff --git a/tests_current/some tests.R b/tests_current/some tests.R index eecdf78..04e340c 100644 --- a/tests_current/some tests.R +++ b/tests_current/some tests.R @@ -38,7 +38,7 @@ gaussian_only_priors <- c("ZS-null", "ZS-full", "hyper-g-laplace", "AIC", "JZS", glm_priors <- glm_and_gaussian_priors gaussian_priors <- c(glm_and_gaussian_priors, gaussian_only_priors) -families <- c("gaussian", "binomial", "poisson", "gamma")[2:4] +families <- c("gaussian", "binomial", "poisson", "gamma")[1] # Required parameters for priors prior_params <- list( -- GitLab From 7677cd5b14b1a4e65ef8390e3e2b899e14bd78c4 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Fri, 4 Apr 2025 17:33:07 +0200 Subject: [PATCH 12/36] fixed all gaussian priors --- R/fbms.R | 20 ++++++++++---------- R/likelihoods.R | 3 +-- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/R/fbms.R b/R/fbms.R index 47f774d..b477a6f 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -216,10 +216,10 @@ gen.mlpost.params.lm <- function (beta_prior, user_params, p, n) { } return(list(var = user_params$var)) }else if (beta_prior == "beta.prime") { - return(list("beta.prime")) + return(list(type = "beta.prime")) } else if (beta_prior == "CH") { check_required_params(c("a", "b", "s"), user_params, beta_prior) - user_params <- list( + user_params <- list(type = "CH", a = user_params$a, b = user_params$b, @@ -229,7 +229,7 @@ gen.mlpost.params.lm <- function (beta_prior, user_params, p, n) { } else if (beta_prior == "tCCH") { check_required_params(c("a", "b", "s", "rho", "v", "k"), user_params, beta_prior) user_params <- list( - "tCCH", + type = "tCCH", a = user_params$a, b = user_params$b, s = user_params$s, @@ -239,25 +239,25 @@ gen.mlpost.params.lm <- function (beta_prior, user_params, p, n) { ) return(user_params) } else if (beta_prior == "intrinsic") { - return(list("intrinsic")) + return(list(type = "intrinsic")) } else if (beta_prior == "TG") { check_required_params(c("a", "s"), user_params, beta_prior) user_params <- list( - "TG", + type = "TG", a = user_params$a, s = user_params$s ) return(user_params) } else if (beta_prior == "Jeffreys") { - return(list("Jeffreys")) + return(list(type = "Jeffreys")) } else if (beta_prior == "ZS-adapted") { - return(list("ZS-adapted")) + return(list(type = "ZS-adapted")) } else if (beta_prior == "benchmark") { - return(list("benchmark")) + return(list(type = "benchmark")) } else if (beta_prior == "robust") { - return(list("robust")) + return(list(type = "robust")) } else if (beta_prior == "uniform") { - return(list("uniform")) + return(list(type = "uniform")) } else{ if(!is.null(user_params$a)) { diff --git a/R/likelihoods.R b/R/likelihoods.R index 388b696..8658da2 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -412,7 +412,7 @@ gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = } else if (params$beta_prior$type == "tCCH") { a <- hyper$a b <- hyper$b - r <- hyper$r + r <- hyper$rho s <- hyper$s v <- hyper$v k <- hyper$k @@ -838,7 +838,6 @@ fbms.mlik.master2 <- function(y, x, model, complex, params = list(family = "gaus } loglik.pi <- select.mlpost.fun(params$beta_prior$type, params$family) - browser() result <- loglik.pi(y,x,model,complex,params_use) -- GitLab From 2ad9512d1eb50a33ffc9c328b30ead60f63ace1f Mon Sep 17 00:00:00 2001 From: aliaksah Date: Fri, 4 Apr 2025 17:39:48 +0200 Subject: [PATCH 13/36] again fixed last thing for tcch under gaussian --- R/fbms.R | 4 +++- R/likelihoods.R | 2 +- tests_current/some tests.R | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/R/fbms.R b/R/fbms.R index b477a6f..ad6ae1b 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -51,8 +51,10 @@ fbms <- function ( if(family == "gaussian") mlpost_params$beta_prior <- gen.mlpost.params.lm(beta_prior$type, beta_prior, ncol(data) - 1, nrow(data)) else + { mlpost_params$beta_prior <- gen.mlpost.params.glm(beta_prior$type, beta_prior, ncol(data) - 1, nrow(data)) - mlpost_params$beta_prior$type <- beta_prior$type + mlpost_params$beta_prior$type <- beta_prior$type + } } else { if (family == "gaussian") loglik.pi <- gaussian.loglik diff --git a/R/likelihoods.R b/R/likelihoods.R index 8658da2..7c4bcc3 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -330,7 +330,7 @@ gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = n <- length(y) # Switch-like structure to assign hyperparameters based on prior - hyper <- params$beta_prior$hyper.parameters + hyper <- params$beta_prior if (params$beta_prior$type == "CH") { # CH prior: b and s should be user-specified, with defaults if not provided a <- ifelse(!is.null(hyper$a), hyper$a, 1) # Default to 1 if not specified diff --git a/tests_current/some tests.R b/tests_current/some tests.R index 04e340c..dd95321 100644 --- a/tests_current/some tests.R +++ b/tests_current/some tests.R @@ -38,7 +38,7 @@ gaussian_only_priors <- c("ZS-null", "ZS-full", "hyper-g-laplace", "AIC", "JZS", glm_priors <- glm_and_gaussian_priors gaussian_priors <- c(glm_and_gaussian_priors, gaussian_only_priors) -families <- c("gaussian", "binomial", "poisson", "gamma")[1] +families <- c("gaussian", "binomial", "poisson", "gamma") # Required parameters for priors prior_params <- list( -- GitLab From 205c14d9147c8428c330237c762bedc11813f75f Mon Sep 17 00:00:00 2001 From: aliaksah Date: Sat, 5 Apr 2025 14:16:08 +0200 Subject: [PATCH 14/36] pused --- R/fbms.R | 2 +- R/likelihoods.R | 1 + tests_current/some tests.R | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/fbms.R b/R/fbms.R index ad6ae1b..99cf477 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -265,7 +265,7 @@ gen.mlpost.params.lm <- function (beta_prior, user_params, p, n) { { alpha <- user_params$a } else - alpha <- -1 + alpha <- -1#check how BAS uses the default if (beta_prior == "g-prior") { return(list(method = 0, alpha = alpha)) } else if (beta_prior == "hyper-g") { diff --git a/R/likelihoods.R b/R/likelihoods.R index 7c4bcc3..802c3b2 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -836,6 +836,7 @@ fbms.mlik.master2 <- function(y, x, model, complex, params = list(family = "gaus params_use$beta_prior <- gen.mlpost.params.glm(params$beta_prior$type, params$beta_prior, p+1, n) params_use$family <- params$family } + loglik.pi <- select.mlpost.fun(params$beta_prior$type, params$family) result <- loglik.pi(y,x,model,complex,params_use) diff --git a/tests_current/some tests.R b/tests_current/some tests.R index dd95321..6e9a5df 100644 --- a/tests_current/some tests.R +++ b/tests_current/some tests.R @@ -46,7 +46,7 @@ prior_params <- list( "hyper-g" = list(a = 3), "hyper-g-n" = list(a = 3), "ZS-null" = list(a = 3), - "ZS-full" = list(a = 3), + "ZS-full" = list(a = 500), "hyper-g-laplace" = list(a = 3), "AIC" = list(a = 3), "JZS" = list(a = 3), @@ -85,7 +85,7 @@ for (family in families) { crit_rounded <- round(result$crit - result.null$crit, 8) - coefs_mean <- round(mean(result$coefs - result.null$coefs), 8) + coefs_mean <- round(mean(result$coefs) - mean(result.null$coefs), 8) cat(sprintf("\nPrior: %-15s -> crit: %8.4f, mean(coefs): %8.4f", prior, crit_rounded, coefs_mean)) -- GitLab From 8bfbcebc9d99be0e0ff11789c6684e204767f739 Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Sat, 5 Apr 2025 14:16:30 +0200 Subject: [PATCH 15/36] * Temporary likelihoods2 file to compare against. --- R/likelihoods2.R | 794 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 794 insertions(+) create mode 100644 R/likelihoods2.R diff --git a/R/likelihoods2.R b/R/likelihoods2.R new file mode 100644 index 0000000..ea8697a --- /dev/null +++ b/R/likelihoods2.R @@ -0,0 +1,794 @@ +# Title : Log likelihood functions +# Objective : Log likelihood functions with priors to be used as templates or directly in GMJMCMC +# Created by: jonlachmann +# Created on: 2021-02-24 + +#' Log likelihood function for glm regression with parameter priors from BAS package +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors and family that BAS uses in glm models +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' glm.logpost.bas(as.integer(rnorm(100) > 0),cbind(1,matrix(rnorm(100))),c(TRUE,TRUE),list(oc = 1)) +#' +#' @importFrom BAS uniform Jeffreys g.prior +#' @importFrom stats poisson Gamma glm.control +#' @export glm.logpost.bas +glm.logpost.bas2 <- function (y, x, model, complex, params = list(r = exp(-0.5), family = "binomial", prior_beta = Jeffreys(), laplace = FALSE)) { + if (length(params) == 0) + params <- list(r = 1/dim(x)[1], family = "binomial", prior_beta = g.prior(max(dim(x)[1],sum(model)-1)), laplace = FALSE) + p <- sum(model) - 1 + if(p==0) + { + probinit <- as.numeric(c(1,0.99)) + model[2] <- T + }else{ + probinit <- as.numeric(c(1,rep(0.99,p))) + } + + mod<-NULL + + tryCatch({ + if(params$family == "binomial") + suppressWarnings({ + mod <- .Call(BAS:::C_glm_deterministic, + y = as.numeric(y), X = as.matrix(x[,model]), + Roffset = as.numeric(rep(0, length(y))), + Rweights = as.numeric(rep(1, length(y))), + Rprobinit = probinit, + Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), + modelprior = uniform(), + betaprior = params$prior_beta, + family = binomial(), + Rcontrol = glm.control(), + Rlaplace = as.integer(params$laplace)) + }) + else if(params$family == "poisson") + suppressWarnings({ + mod <- .Call(BAS:::C_glm_deterministic, + y = as.numeric(y), X = as.matrix(x[,model]), + Roffset = as.numeric(rep(0, length(y))), + Rweights = as.numeric(rep(1, length(y))), + Rprobinit = probinit, + Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), + modelprior = uniform(), + betaprior = params$prior_beta, + family = poisson(), + Rcontrol = glm.control(), + Rlaplace = as.integer(params$laplace)) + }) + else{ + suppressWarnings({ + mod <- .Call(BAS:::C_glm_deterministic, + y = as.numeric(y), X = as.matrix(x[,model]), + Roffset = as.numeric(rep(0, length(y))), + Rweights = as.numeric(rep(1, length(y))), + Rprobinit = probinit, + Rmodeldim = as.integer(rep(0,ifelse(p==0,2,1))), + modelprior = uniform(), + betaprior = params$prior_beta, + family = Gamma(), + Rcontrol = glm.control(), + Rlaplace = as.integer(params$laplace)) + })} + }, error = function(e) { + # Handle the error by setting result to NULL + mod <- NULL + # You can also print a message or log the error if needed + cat("An error occurred:", conditionMessage(e), "\n") + }) + + if(length(mod)==0) { + return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)),coefs = rep(0,p+1))) + } + + if(p == 0) + { + ret <- mod$logmarg[2] + log(params$r) * sum(complex$oc) + return(list(crit=ret, coefs=mod$mle[[2]])) + } + ret <- mod$logmarg + log(params$r) * sum(complex$oc) + return(list(crit=ret, coefs=mod$mle[[1]])) +} + + +#' Log likelihood function for Gaussian regression with parameter priors from BAS package +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors where the corresponding integers as prior_beta must be provided "g-prior" = 0, "hyper-g" = 1, "EB-local" = 2, "BIC" = 3, "ZS-null" = 4, "ZS-full" = 5, "hyper-g-laplace" = 6, "AIC" = 7, "EB-global" = 2, "hyper-g-n" = 8, "JZS" = 9 and in Gaussian models +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' lm.logpost.bas(rnorm(100), cbind(1,matrix(rnorm(100))), c(TRUE,TRUE), list(oc = 1)) +#' +#' +#' @export lm.logpost.bas +lm.logpost.bas2 <- function (y, x, model, complex, params = list(r = exp(-0.5),prior_beta = "g-prior",alpha = 4)) { + if (length(params) == 0) + params <- list(r = 1/dim(x)[1], prior_beta = 0,alpha = max(dim(x)[1],sum(model)^2)) + + + p <- sum(model) - 1 + if(p==0) + { + probinit <- as.numeric(c(1,0.99)) + model[2] <- T + }else{ + probinit <- as.numeric(c(1,rep(0.99,p))) + } + + mod<-NULL + + tryCatch({ + suppressWarnings({ + mod <- .Call(BAS:::C_deterministic, + y = y, X = as.matrix(x[,model]), + as.numeric(rep(1, length(y))), + probinit, + as.integer(rep(0,ifelse(p==0,2,1))), + incint = as.integer(F), + alpha = ifelse(length(params$alpha)>0,as.numeric(params$alpha),NULL), + method = as.integer(params$prior_beta), + modelprior = uniform(), + Rpivot = TRUE, + Rtol = 1e-7) + }) + }, error = function(e) { + # Handle the error by setting result to NULL + mod <- NULL + # You can also print a message or log the error if needed + cat("An error occurred:", conditionMessage(e), "\n") + }) + + if(length(mod)==0) { + return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)),coefs = rep(0,p+1))) + } + + if(p == 0) + { + ret <- mod$logmarg[2] + log(params$r) * sum(complex$oc) + return(list(crit=ret, coefs=mod$mle[[2]])) + } + ret <- mod$logmarg + log(params$r) * sum(complex$oc) + return(list(crit=ret, coefs=mod$mle[[1]])) +} + + +#' Log likelihood function for logistic regression with a Jeffreys parameter prior and BIC approximations of the posterior +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' logistic.loglik2(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) +#' +#' +#' @export logistic.loglik2 +logistic.loglik2 <- function (y, x, model, complex, params = list(r = exp(-0.5))) { + if (length(params) == 0) + params <- list(r = 1/dim(x)[1]) + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = binomial())}) + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * sum(complex$oc))) / 2 + return(list(crit=ret, coefs=mod$coefficients)) +} + +#' Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user, family must be specified +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' glm.loglik(abs(rnorm(100))+1, matrix(rnorm(100)), TRUE, list(oc = 1)) +#' +#' +#' @export glm.loglik +glm.loglik2 <- function (y, x, model, complex, params = list(r = exp(-0.5),family = "Gamma")) { + if (length(params) == 0) + params <- list(r = 1/dim(x)[1]) + + if(params$family == "binomial") + { + fam = binomial() + }else if(params$family == "poisson"){ + fam = poisson() + }else + { + fam = Gamma() + } + + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = fam)}) + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * sum(complex$oc))) / 2 + return(list(crit=ret, coefs=mod$coefficients)) +} + + +#' Log likelihood function for gaussian regression with a Jeffreys prior and BIC approximation of MLIK with both known and unknown variance of the responses +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' gaussian.loglik(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), NULL) +#' +#' +#' @export gaussian.loglik +gaussian.loglik2 <- function (y, x, model, complex, params) { + if(length(params)==0) + params <- list() + if (length(params$r) == 0) + params$r <- 1/dim(x)[1] + if(length(params$var) == 0) + params$var <- 1 + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = gaussian())}) + + if(params$var == "unknown") + ret <- (-(mod$aic + (log(length(y))-2) * (mod$rank) - 2 * log(params$r) * (sum(complex$oc)))) / 2 + else + ret <- (-(mod$deviance/params$var + log(length(y)) * (mod$rank - 1) - 2 * log_prior(params, complex))) / 2 + + return(list(crit=ret, coefs=mod$coefficients)) +} + + +#' Log likelihood function for linear regression using Zellners g-prior +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' gaussian.loglik2.g(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) +#' +#' @export gaussian.loglik2.g +gaussian.loglik2.g <- function (y, x, model, complex, params = NULL) +{ + if(length(params)==0) + params <- list() + if (length(params$r) == 0) + params$r <- 1/dim(x)[1] + suppressWarnings({ + mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) + }) + # Calculate R-squared + y_mean <- mean(y) + TSS <- sum((y - y_mean)^2) + RSS <- sum(mod$residuals^2) + Rsquare <- 1 - (RSS / TSS) + + if (length(params$r) == 0 || length(params$g) == 0) + { + params$r <- 1/dim(x)[1] + params$g <- max(mod$rank^2,length(y)) + } + + # logarithm of marginal likelihood + mloglik <- 0.5*(log(1.0 + params$g) * (dim(x)[1] - mod$rank) - log(1.0 + params$g * (1.0 - Rsquare)) * (dim(x)[1] - 1))*(mod$rank!=1) + + # logarithm of model prior + # default value or parameter r + lp <- log_prior(params, complex) + + return(list(crit = mloglik + lp, coefs = mod$coefficients)) +} + + +#' Log likelihood function for Gaussian regression with parameter priors from BAS package +#' +#' This function computes the marginal likelihood of a Gaussian regression model under different priors. +#' +#' @param y A numeric vector containing the dependent variable. +#' @param x A matrix containing the independent variables, including an intercept column. +#' @param model A logical vector indicating which variables to include in the model. +#' @param complex A list containing complexity measures for the features. +#' @param params A list of parameters for the log likelihood, specifying the tuning parameters of beta priors. +#' +#' @return A list with elements: +#' \item{crit}{Log marginal likelihood combined with the log prior.} +#' \item{coefs}{Posterior mode of the coefficients.} +#' +#' @examples +#' gaussian_tcch_log_likelihood(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) +#' +#' @importFrom BAS phi1 hypergeometric1F1 hypergeometric2F1 +#' @importFrom tolerance F1 +#' @export +gaussian_tcch_log_likelihood2 <- function(y, x, model, complex, params = list(r = exp(-0.5), prior_beta = "intrinsic")) { + + # Fit the linear model using fastglm + fitted_model <- fastglm(as.matrix(x[, model]), y, family = gaussian()) + log_likelihood <- -(fitted_model$aic -2 * (fitted_model$rank))/2 + # Compute R-squared manually + y_mean <- mean(y) + TSS <- sum((y - y_mean)^2) + RSS <- sum(fitted_model$residuals^2) + R2_M <- 1 - (RSS / TSS) + + p_M <- fitted_model$rank + n <- length(y) + + # Switch-like structure to assign hyperparameters based on prior + if (params$prior_beta[[1]] == "CH") { + # CH prior: b and s should be user-specified, with defaults if not provided + a <- ifelse(!is.null(params$prior_beta$a),params$prior_beta$a, 1) # Default to 1 if not specified + b <- ifelse(!is.null(params$prior_beta$b),params$prior_beta$b, 2) # Default to 1 if not specified + r <- 0 + s <- ifelse(!is.null(params$prior_beta$s), params$prior_beta$s, 1) # Default to 1 if not specified + v <- 1 + k <- 1 + + } else if (params$prior_beta[[1]] == "hyper-g") { + a <- 1 + b <- 2 + r <- 0 + s <- 0 + v <- 1 + k <- 1 + + } else if (params$prior_beta[[1]] == "uniform") { + a <- 2 + b <- 2 + r <- 0 + s <- 0 + v <- 1 + k <- 1 + + } else if (params$prior_beta[[1]] == "Jeffreys") { + a <- 0.0001 + b <- 2 + r <- 0 + s <- 0 + v <- 1 + k <- 1 + } else if (params$prior_beta[[1]] == "beta.prime") { + a <- 1/2 + b <- n - p_M - 1.5 + r <- 0 + s <- 0 + v <- 1 + k <- 1 + + } else if (params$prior_beta[[1]] == "benchmark") { + a <- 0.02 + b <- 0.02 * max(n, p_M^2) + r <- 0 + s <- 0 + v <- 1 + k <- 1 + + } else if (params$prior_beta[[1]] == "TG") { + + a <- 2 * ifelse(!is.null(params$prior_beta$a),params$prior_beta$a, 1) + b <- 2 + r <- 0 + s <- 2 * ifelse(!is.null(params$prior_beta$s),params$prior_beta$s, 1) + v <- 1 + k <- 1 + + } else if (params$prior_beta[[1]] == "ZS-adapted") { + a <- 1 + b <- 2 + r <- 0 + s <- n + 3 + v <- 1 + k <- 1 + } else if (params$prior_beta[[1]] == "robust") { + a <- 1 + b <- 2 + r <- 1.5 + s <- 0 + v <- (n + 1) / (p_M + 1) + k <- 1 + + } else if (params$prior_beta[[1]] == "hyper-g-n") { + a <- 1 + b <- 2 + r <- 1.5 + s <- 0 + v <- 1 + k <- 1 + + } else if (params$prior_beta[[1]] == "intrinsic") { + a <- 1 + b <- 1 + r <- 1 + s <- 0 + v <- (n + p_M + 1) / (p_M + 1) + k <- (n + p_M + 1) / n + + }else if (params$prior_beta[[1]] == "tCCH") { + a <- params$prior_beta$a + b <- params$prior_beta$b + r <- params$prior_beta$rho + s <- params$prior_beta$s + v <- params$prior_beta$v + k <- params$prior_beta$k + }else { + stop("Unknown prior name: ", params$prior_beta) + } + + # + if (!is.null(r) & r == 0) { + + term1 <- lbeta((a + p_M) / 2, b / 2) + term2 <- phi1(b / 2, (n - 1) / 2, (a + b + p_M) / 2, s / (2 * v), min(0.8,R2_M/(v - (v - 1) * R2_M),log = T)) + + if(R2_M/(v - (v - 1) * R2_M)>0.8) + { + warning("Infinite marginal log likelihood! phi1 last argument reduced to 0.8. Use a different prior_beta (Robust, Hyper-g/n, Intrinsic, or g-prior)") + } + + term3 <- lbeta(a / 2, b / 2) + term4 <- hypergeometric1F1(b / 2, (a + b) / 2, s / (2 * v),log = T) + marginal_likelihood <- log_likelihood + (term1) + (term2) - (p_M / 2) * log(v) - ((n - 1) / 2)*log(1 - (1 - 1 / v) * R2_M) - (term3) - (term4) + } else if (!is.null(s) & s == 0) { + term1 <- lbeta((a + p_M) / 2, b / 2) + term2 <- hypergeometric2F1(r, b / 2, (a + b) / 2, 1 - k,log = T) + term3 <- F1((a + p_M) / 2, (a + b + p_M + 1 - n - 2 * r) / 2, (n - 1) / 2, (a + b + p_M) / 2, 1 - k, 1 - k - (R2_M^2 * k) / ((1 - R2_M) * v)) + marginal_likelihood <- log_likelihood + (a+p_M-2*r)/2*log(k) + (term1) - (term2) - (term3) - (p_M / 2) * log(v) - log(1 - R2_M) * ((n - 1) / 2) - lbeta(a / 2, b / 2) + + } else { + stop("Invalid inputs: either r = 0 or s = 0 must be specified.") + } + + if (length(params$r) == 0) params$r <- 1/dim(x)[1] # default value or parameter r + + lp <- log_prior(params, complex) + + return(list(crit = marginal_likelihood + lp, coefs = fitted_model$coefficients)) +} + + + +#' Log likelihood function for logistic regression with an approximate Laplace approximations used +#' This function is created as an example of how to create an estimator that is used +#' to calculate the marginal likelihood of a model. +#' +#' @param y A vector containing the dependent variable +#' @param x The matrix containing the precalculated features +#' @param model The model to estimate as a logical vector +#' @param complex A list of complexity measures for the features +#' @param params A list of parameters for the log likelihood, supplied by the user +#' +#' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +#' +#' @examples +#' logistic.loglik2.ala(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) +#' +#' +#' @export logistic.loglik2.ala +logistic.loglik2.ala <- function (y, x, model, complex, params = list(r = exp(-0.5))) { + if (length(params) == 0) + params <- list(r = 1/dim(x)[1]) + suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = binomial(),maxit = 1)}) + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) -2 * log(params$r) * sum(complex$oc))) / 2 + return(list(crit=ret, coefs=mod$coefficients)) +} + + + +#' Log likelihood function for logistic regression for alpha calculation +#' This function is just the bare likelihood function +#' +#' @param a A vector of the alphas to be used +#' @param data The data to be used for calculation +#' @param mu_func The function linking the mean to the covariates, +#' as a string with the alphas as a\[i\]. +#' +#' @return A numeric with the log likelihood. +#' +#' @export logistic.loglik2.alpha +logistic.loglik2.alpha <- function (a, data, mu_func) { + m <- 1 / (1 + exp(-eval(parse(text = mu_func)))) + -sum((data[,1] * log(m) + (1 - data[, 1]) * log(1 - m))) +} + + +#' Log likelihood function for gaussian regression for alpha calculation +#' This function is just the bare likelihood function +#' Note that it only gives a proportional value and is equivalent to least squares +#' +#' @param a A vector of the alphas to be used +#' @param data The data to be used for calculation +#' @param mu_func The function linking the mean to the covariates, +#' as a string with the alphas as a\[i\]. +#' +#' @return A numeric with the log likelihood. +#' @examples +#'\dontrun{ +#'gaussian.loglik2.alpha(my_alpha,my_data,my_mu) +#'} +#' @export gaussian.loglik2.alpha +gaussian.loglik2.alpha <- function (a, data, mu_func) { + m <- eval(parse(text=mu_func)) + sum((data[,1]-m)^2) +} + + +#' Log model prior function +#' @param params list of passed parameters of the likelihood in GMJMCMC +#' @param complex list of complexity measures of the features included into the model +#' +#' @return A numeric with the log model prior. +#' +#' @examples +#' log_prior(params = list(r=2), complex = list(oc = 2)) +#' +#' @export log_prior +log_prior <- function (params, complex) { + pl <- log(params$r) * (sum(complex$oc)) + return(pl) +} + + +#' Master Log Marginal Likelihood Function +#' +#' This function serves as a unified interface to compute the log marginal likelihood +#' for different regression models and priors by calling specific log likelihood functions. +#' +#' @param y A numeric vector containing the dependent variable. +#' @param x A matrix containing the precalculated features (independent variables). +#' @param model A logical vector indicating which variables to include in the model. +#' @param complex A list of complexity measures for the features. +#' @param params A list of parameters controlling the model family, prior, and tuning parameters. +#' Key elements include: +#' - family: "binomial", "poisson", "gamma" (all three referred to as GLM below), or "gaussian" (default: "gaussian") +#' - prior_beta: Type of prior as a string (default: "g-prior"). Possible values include: +#' - "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +#' - "CH": Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, optionally `s`) +#' - "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires `a` for Gaussian) +#' - "EB-global": Empirical Bayes local prior (Gaussian, requires `a` for Gaussian) +#' - "g-prior": Zellner's g-prior (GLM/Gaussian, requires `g`) +#' - "hyper-g": Hyper-g prior (GLM/Gaussian, requires `a`) +#' - "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires `a`) +#' - "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, `s`, `rho`, `v`, `k`) +#' - "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +#' - "TG": Truncated Gamma prior (GLM/Gamma, requires `a`, `s`) +#' - "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) +#' - "uniform": Uniform prior (GLM/Gaussian, no additional args) +#' - "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +#' - "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +#' - "robust": Robust prior (Gaussian/GLM, no additional args) +#' - "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +#' - "ZS-null": Zellner-Siow null prior (Gaussian, requires `a`) +#' - "ZS-full": Zellner-Siow full prior (Gaussian, requires `a`) +#' - "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires `a`) +#' - "AIC": AIC prior from BAS (Gaussian, requires penalty `a`) +#' - "BIC": BIC prior from BAS (Gaussian/GLM) +#' - "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires `a`) +#' - r: Model complexity penalty (default: 1/n) +#' - g: Tuning parameter for g-prior (default: max(n, p^2)) +#' - a, b, s, v, rho, k: Hyperparameters for various priors +#' - n: Sample size for some priors (default: length(y)) +#' - var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") +#' - laplace: Logical for Laplace approximation in GLM only (default: FALSE) +#' +#' @return A list with elements: +#' \item{crit}{Log marginal likelihood combined with the log prior.} +#' \item{coefs}{Posterior mode of the coefficients.} +#' +#' @examples +#' fbms.mlik.master(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), list(family = "gaussian", prior_beta = "g-prior")) +#' +#' @importFrom BAS beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform +#' @export +fbms.mlik.master_old <- function(y, x, model, complex, params = list(family = "gaussian", prior_beta = "g-prior", r = exp(-0.5))) { + # Extract dimensions + n <- length(y) + p <- sum(model) - 1 # Number of predictors excluding intercept + + # Set default parameters if not fully specified + if (is.null(params$family)) params$family <- "gaussian" + if (is.null(params$prior_beta)) params$prior_beta <- "g-prior" + if (is.null(params$g)) params$g <- max(p^2, n) + if (is.null(params$n)) params$n <- n + if (is.null(params$r)) params$r <- 1/n + + # Ensure complex has oc if not provided, ignore by default + if (is.null(complex$oc)) complex$oc <- 0 + + # Homogenize and prepare params for nested calls + params_master <- params + params_nested <- list(r = params_master$r) + + # Define valid priors for each family + #glm_only_priors <- c("CCH", "tCCH", "TG") + glm_and_gaussian_priors <- c("CH", "tCCH", "TG","beta.prime", "EB-local", "g-prior", "hyper-g", "hyper-g-n", + "intrinsic", "ZS-adapted", "Jeffreys", "uniform", "benchmark", "robust", "Jeffreys-BIC") + gaussian_only_priors <- c("ZS-null", "ZS-full", "hyper-g-laplace","BIC", "AIC", "JZS","EB-global") + + #review a bit + gaussian_not_robust <- c("CH", "tCCH", "ZS-adapted", "TG","beta.prime", "benchmark","Jeffreys") + gaussian_robust <- c("g-prior", "hyper-g", "EB-local","BIC", "Jeffreys-BIC", "ZS-null", "ZS-full", "hyper-g-laplace", + "AIC", "hyper-g-n", "JZS") + gaussian_tcch <- c("CH", "tCCH", "TG","beta.prime", "intrinsic", "ZS-adapted", "uniform","Jeffreys", "benchmark", "robust") + gaussian_bas <- c("g-prior", "hyper-g", "EB-local","ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS") + + all_priors <- c(glm_and_gaussian_priors, gaussian_only_priors) + #browser() + # Validate prior_beta + if (!params_master$prior_beta %in% all_priors) { + stop(sprintf("Prior '%s' is not supported. Supported priors: %s", + params_master$prior_beta, paste(all_priors, collapse = ", "))) + } + + # Decision logic based on family and prior_beta + if (params_master$family %in% c("binomial", "poisson", "gamma")) { + if (params_master$prior_beta %in% gaussian_only_priors) { + stop(sprintf("Prior '%s' is not supported for GLM family '%s'. Supported GLM priors: %s", + params_master$prior_beta, params_master$family, + paste(c(glm_only_priors, glm_and_gaussian_priors), collapse = ", "))) + } + + params_nested$family <- params_master$family + if (is.null(params_master$laplace)) params_nested$laplace <- FALSE else params_nested$laplace <- params_master$laplace + + #if(params_nested$laplace) + # print("Laplace approximations will be used") + + if (params_master$prior_beta == "Jeffreys-BIC") { + if(params_nested$family == "binomial") + result <- logistic.loglik2(y, x, model, complex, params_nested) + else if(params_nested$family%in% c("poisson", "gamma")) + result <- glm.loglik2(y, x, model, complex, params_nested) + + } else { + params_nested$prior_beta <- switch( + params_master$prior_beta, + "beta.prime" = beta.prime(n = n), + "CH" = CCH(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + beta = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), + "EB-local" = EB.local(), + "g-prior" = g.prior(g = params_master$g), + "hyper-g" = hyper.g(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), + "tCCH" = tCCH(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + beta = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s, + r = if (is.null(params_master$rho)) stop("rho must be provided") else params_master$rho, + v = if (is.null(params_master$v)) stop("v must be provided") else params_master$v, + theta = if (is.null(params_master$k)) stop("k must be provided") else params_master$k), + "intrinsic" = intrinsic(n = params_master$n), + "TG" = TG(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), + "Jeffreys" = Jeffreys(), + "uniform" = tCCH(alpha = 2, + beta = 2, + s = 0, + r = 0, + v = 1, + theta = 1), + "benchmark" = tCCH(alpha = 0.02, + beta = 0.02*max(n,p^2), + s = 0, + r = 0, + v = 1, + theta = 1), + "ZS-adapted" = tCCH(alpha = 1, + beta = 2, + s = n + 3, + r = 0, + v = 1, + theta = 1), + "TG" = TG(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), + "robust" = robust(n = if (is.null(params_master$n)) as.numeric(n) else as.numeric(params_master$n)), + "hyper-g-n" = hyper.g.n(alpha = if (is.null(params_master$a)) 3 else params_master$a, + n = params_master$n), + "BIC" = bic.prior(n = if (is.null(params_master$n)) n else params_master$n), + stop("Unrecognized prior_beta for GLM: ", params_master$prior_beta) + ) + result <- glm.logpost.bas2(y, x, model, complex, params_nested) + } + } else if (params_master$family == "gaussian") { + + if (params_master$prior_beta %in% gaussian_not_robust) { + warning(sprintf("Prior '%s' is not reccomended supported for Gaussian family '%s'. Can be unstable for strong signals (R^2 > 0.9). Reccomended priors under Gaussian family: %s", + params_master$prior_beta, params_master$family, + paste(gaussian_robust, collapse = ", "))) + } + + params_nested$r <- params_master$r + + if (params_master$prior_beta %in% gaussian_tcch) { + + params_nested$prior_beta <- switch( + params_master$prior_beta, + "beta.prime" = list("beta.prime"), + "CH" = list("CH",a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + b = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), + "tCCH" = list("tCCH", a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + b = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s, + rho = if (is.null(params_master$rho)) stop("rho must be provided") else params_master$rho, + v = if (is.null(params_master$v)) stop("v must be provided") else params_master$v, + k = if (is.null(params_master$k)) stop("k must be provided") else params_master$k), + "intrinsic" = list("intrinsic"), + "TG" = list("TG",a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, + s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), + "Jeffreys" = list("Jeffreys"), + "ZS-adapted" = list("ZS-adapted"), + "benchmark" = list("benchmark"), + "robust" = list("robust"), + "uniform" = list("uniform"), + stop("Unrecognized prior_beta for Gaussian GLM: ", params_master$prior_beta) + ) + result <- gaussian_tcch_log_likelihood2(y, x, model, complex, params_nested) + + }else if (params_master$prior_beta == "Jeffreys-BIC") { + if (is.null(params_master$var)) params_nested$var <- "unknown" else params_nested$var <- params_master$var + result <- gaussian.loglik2(y, x, model, complex, params_nested) + } else if (params_master$prior_beta %in% gaussian_bas) { + + params_nested$prior_beta <- switch( + params_master$prior_beta, + "g-prior" = 0, + "hyper-g" = 1, + "EB-local" = 2, + "BIC" = 3, + "ZS-null" = 4, + "ZS-full" = 5, + "hyper-g-laplace" = 6, + "AIC" = 7, + "EB-global" = 2, + "hyper-g-n" = 8, + "JZS" = 9 + ) + if(params_master$prior_beta == "g-prior") + { + if (!is.null(params_master$g)) params_nested$g <- params_master$g else stop("g must be provided") + result <- gaussian.loglik2.g(y, x, model, complex, params_nested) + } + else{ + if (!is.null(params_master$a)) params_nested$alpha <- params_master$a else params_nested$alpha = -1 + result <- lm.logpost.bas2(y, x, model, complex, params_nested) + } + + } else { + stop("Unexpected error in prior_beta logic for Gaussian.") + } + } else { + stop("Unsupported family: ", params_master$family, ". Supported families are 'binomial', 'poisson', 'gamma', or 'gaussian'.") + } + + # Ensure consistent return structure + if (is.null(result$crit) || is.null(result$coefs)) { + stop("Error in computation: Returned result does not contain 'crit' and 'coefs'.") + } + + return(list(crit = result$crit, coefs = result$coefs)) +} \ No newline at end of file -- GitLab From 3eb5b5b256371373ff72c593ea8c12fbdc7d56d0 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Sat, 5 Apr 2025 14:34:19 +0200 Subject: [PATCH 16/36] tests added --- NAMESPACE | 8 +- R/likelihoods.R | 243 +------------------------------------ man/fbms.mlik.master.Rd | 2 +- man/fbms.mlik.master2.Rd | 73 ----------- man/glm.logpost.bas.Rd | 3 +- man/log_prior.Rd | 10 +- tests_current/some tests.R | 28 ++++- 7 files changed, 42 insertions(+), 325 deletions(-) delete mode 100644 man/fbms.mlik.master2.Rd diff --git a/NAMESPACE b/NAMESPACE index 6a49e38..fa98675 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,12 +22,15 @@ export(erf) export(exp_dbl) export(fbms) export(fbms.mlik.master) -export(fbms.mlik.master2) +export(fbms.mlik.master_old) export(gauss) export(gaussian.loglik) export(gaussian.loglik.alpha) export(gaussian.loglik.g) +export(gaussian.loglik2.alpha) +export(gaussian.loglik2.g) export(gaussian_tcch_log_likelihood) +export(gaussian_tcch_log_likelihood2) export(gelu) export(gen.params.gmjmcmc) export(gen.params.mjmcmc) @@ -45,6 +48,9 @@ export(log_prior) export(logistic.loglik) export(logistic.loglik.ala) export(logistic.loglik.alpha) +export(logistic.loglik2) +export(logistic.loglik2.ala) +export(logistic.loglik2.alpha) export(marginal.probs) export(merge_results) export(mjmcmc) diff --git a/R/likelihoods.R b/R/likelihoods.R index 802c3b2..91c7b8d 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -531,245 +531,6 @@ log_prior <- function (params, complex) { } -#' Master Log Marginal Likelihood Function -#' -#' This function serves as a unified interface to compute the log marginal likelihood -#' for different regression models and priors by calling specific log likelihood functions. -#' -#' @param y A numeric vector containing the dependent variable. -#' @param x A matrix containing the precalculated features (independent variables). -#' @param model A logical vector indicating which variables to include in the model. -#' @param complex A list of complexity measures for the features. -#' @param params A list of parameters controlling the model family, prior, and tuning parameters. -#' Key elements include: -#' - family: "binomial", "poisson", "gamma" (all three referred to as GLM below), or "gaussian" (default: "gaussian") -#' - prior_beta: Type of prior as a string (default: "g-prior"). Possible values include: -#' - "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) -#' - "CH": Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, optionally `s`) -#' - "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires `a` for Gaussian) -#' - "EB-global": Empirical Bayes local prior (Gaussian, requires `a` for Gaussian) -#' - "g-prior": Zellner's g-prior (GLM/Gaussian, requires `g`) -#' - "hyper-g": Hyper-g prior (GLM/Gaussian, requires `a`) -#' - "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires `a`) -#' - "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, `s`, `rho`, `v`, `k`) -#' - "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) -#' - "TG": Truncated Gamma prior (GLM/Gamma, requires `a`, `s`) -#' - "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) -#' - "uniform": Uniform prior (GLM/Gaussian, no additional args) -#' - "benchmark": Benchmark prior (Gaussian/GLM, no additional args) -#' - "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) -#' - "robust": Robust prior (Gaussian/GLM, no additional args) -#' - "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) -#' - "ZS-null": Zellner-Siow null prior (Gaussian, requires `a`) -#' - "ZS-full": Zellner-Siow full prior (Gaussian, requires `a`) -#' - "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires `a`) -#' - "AIC": AIC prior from BAS (Gaussian, requires penalty `a`) -#' - "BIC": BIC prior from BAS (Gaussian/GLM) -#' - "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires `a`) -#' - r: Model complexity penalty (default: 1/n) -#' - g: Tuning parameter for g-prior (default: max(n, p^2)) -#' - a, b, s, v, rho, k: Hyperparameters for various priors -#' - n: Sample size for some priors (default: length(y)) -#' - var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") -#' - laplace: Logical for Laplace approximation in GLM only (default: FALSE) -#' -#' @return A list with elements: -#' \item{crit}{Log marginal likelihood combined with the log prior.} -#' \item{coefs}{Posterior mode of the coefficients.} -#' -#' @examples -#' fbms.mlik.master(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), list(family = "gaussian", prior_beta = "g-prior")) -#' -#' @importFrom BAS beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform -#' @export -fbms.mlik.master <- function(y, x, model, complex, params = list(family = "gaussian", prior_beta = "g-prior", r = exp(-0.5))) { - # Extract dimensions - n <- length(y) - p <- sum(model) - 1 # Number of predictors excluding intercept - - # Set default parameters if not fully specified - if (is.null(params$family)) params$family <- "gaussian" - if (is.null(params$prior_beta)) params$prior_beta <- "g-prior" - if (is.null(params$g)) params$g <- max(p^2, n) - if (is.null(params$n)) params$n <- n - if (is.null(params$r)) params$r <- 1/n - - # Ensure complex has oc if not provided, ignore by default - if (is.null(complex$oc)) complex$oc <- 0 - - # Homogenize and prepare params for nested calls - params_master <- params - params_nested <- list(r = params_master$r) - - # Define valid priors for each family - #glm_only_priors <- c("CCH", "tCCH", "TG") - glm_and_gaussian_priors <- c("CH", "tCCH", "TG","beta.prime", "EB-local", "g-prior", "hyper-g", "hyper-g-n", - "intrinsic", "ZS-adapted", "Jeffreys", "uniform", "benchmark", "robust", "Jeffreys-BIC") - gaussian_only_priors <- c("ZS-null", "ZS-full", "hyper-g-laplace","BIC", "AIC", "JZS","EB-global") - - #review a bit - gaussian_not_robust <- c("CH", "tCCH", "ZS-adapted", "TG","beta.prime", "benchmark","Jeffreys") - gaussian_robust <- c("g-prior", "hyper-g", "EB-local","BIC", "Jeffreys-BIC", "ZS-null", "ZS-full", "hyper-g-laplace", - "AIC", "hyper-g-n", "JZS") - gaussian_tcch <- c("CH", "tCCH", "TG","beta.prime", "intrinsic", "ZS-adapted", "uniform","Jeffreys", "benchmark", "robust") - gaussian_bas <- c("g-prior", "hyper-g", "EB-local","ZS-null", "ZS-full", "BIC", "hyper-g-laplace", "AIC", "EB-global", "hyper-g-n", "JZS") - - all_priors <- c(glm_and_gaussian_priors, gaussian_only_priors) - #browser() - # Validate prior_beta - if (!params_master$prior_beta %in% all_priors) { - stop(sprintf("Prior '%s' is not supported. Supported priors: %s", - params_master$prior_beta, paste(all_priors, collapse = ", "))) - } - - # Decision logic based on family and prior_beta - if (params_master$family %in% c("binomial", "poisson", "gamma")) { - if (params_master$prior_beta %in% gaussian_only_priors) { - stop(sprintf("Prior '%s' is not supported for GLM family '%s'. Supported GLM priors: %s", - params_master$prior_beta, params_master$family, - paste(c(glm_only_priors, glm_and_gaussian_priors), collapse = ", "))) - } - - params_nested$family <- params_master$family - if (is.null(params_master$laplace)) params_nested$laplace <- FALSE else params_nested$laplace <- params_master$laplace - - #if(params_nested$laplace) - # print("Laplace approximations will be used") - - if (params_master$prior_beta == "Jeffreys-BIC") { - if(params_nested$family == "binomial") - result <- logistic.loglik(y, x, model, complex, params_nested) - else if(params_nested$family%in% c("poisson", "gamma")) - result <- glm.loglik(y, x, model, complex, params_nested) - - } else { - params_nested$prior_beta <- switch( - params_master$prior_beta, - "beta.prime" = beta.prime(n = n), - "CH" = CCH(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, - beta = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, - s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), - "EB-local" = EB.local(), - "g-prior" = g.prior(g = params_master$g), - "hyper-g" = hyper.g(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), - "tCCH" = tCCH(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, - beta = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, - s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s, - r = if (is.null(params_master$rho)) stop("rho must be provided") else params_master$rho, - v = if (is.null(params_master$v)) stop("v must be provided") else params_master$v, - theta = if (is.null(params_master$k)) stop("k must be provided") else params_master$k), - "intrinsic" = intrinsic(n = params_master$n), - "TG" = TG(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), - "Jeffreys" = Jeffreys(), - "uniform" = tCCH(alpha = 2, - beta = 2, - s = 0, - r = 0, - v = 1, - theta = 1), - "benchmark" = tCCH(alpha = 0.02, - beta = 0.02*max(n,p^2), - s = 0, - r = 0, - v = 1, - theta = 1), - "ZS-adapted" = tCCH(alpha = 1, - beta = 2, - s = n + 3, - r = 0, - v = 1, - theta = 1), - "TG" = TG(alpha = if (is.null(params_master$a)) stop("a must be provided") else params_master$a), - "robust" = robust(n = if (is.null(params_master$n)) as.numeric(n) else as.numeric(params_master$n)), - "hyper-g-n" = hyper.g.n(alpha = if (is.null(params_master$a)) 3 else params_master$a, - n = params_master$n), - "BIC" = bic.prior(n = if (is.null(params_master$n)) n else params_master$n), - stop("Unrecognized prior_beta for GLM: ", params_master$prior_beta) - ) - result <- glm.logpost.bas(y, x, model, complex, params_nested) - } - } else if (params_master$family == "gaussian") { - - if (params_master$prior_beta %in% gaussian_not_robust) { - warning(sprintf("Prior '%s' is not reccomended supported for Gaussian family '%s'. Can be unstable for strong signals (R^2 > 0.9). Reccomended priors under Gaussian family: %s", - params_master$prior_beta, params_master$family, - paste(gaussian_robust, collapse = ", "))) - } - - params_nested$r <- params_master$r - - if (params_master$prior_beta %in% gaussian_tcch) { - - params_nested$prior_beta <- switch( - params_master$prior_beta, - "beta.prime" = list("beta.prime"), - "CH" = list("CH",a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, - b = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, - s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), - "tCCH" = list("tCCH", a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, - b = if (is.null(params_master$b)) stop("b must be provided") else params_master$b, - s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s, - rho = if (is.null(params_master$rho)) stop("rho must be provided") else params_master$rho, - v = if (is.null(params_master$v)) stop("v must be provided") else params_master$v, - k = if (is.null(params_master$k)) stop("k must be provided") else params_master$k), - "intrinsic" = list("intrinsic"), - "TG" = list("TG",a = if (is.null(params_master$a)) stop("a must be provided") else params_master$a, - s = if (is.null(params_master$s)) stop("s must be provided") else params_master$s), - "Jeffreys" = list("Jeffreys"), - "ZS-adapted" = list("ZS-adapted"), - "benchmark" = list("benchmark"), - "robust" = list("robust"), - "uniform" = list("uniform"), - stop("Unrecognized prior_beta for Gaussian GLM: ", params_master$prior_beta) - ) - result <- gaussian_tcch_log_likelihood(y, x, model, complex, params_nested) - - }else if (params_master$prior_beta == "Jeffreys-BIC") { - if (is.null(params_master$var)) params_nested$var <- "unknown" else params_nested$var <- params_master$var - result <- gaussian.loglik(y, x, model, complex, params_nested) - } else if (params_master$prior_beta %in% gaussian_bas) { - - params_nested$prior_beta <- switch( - params_master$prior_beta, - "g-prior" = 0, - "hyper-g" = 1, - "EB-local" = 2, - "BIC" = 3, - "ZS-null" = 4, - "ZS-full" = 5, - "hyper-g-laplace" = 6, - "AIC" = 7, - "EB-global" = 2, - "hyper-g-n" = 8, - "JZS" = 9 - ) - if(params_master$prior_beta == "g-prior") - { - if (!is.null(params_master$g)) params_nested$g <- params_master$g else stop("g must be provided") - result <- gaussian.loglik.g(y, x, model, complex, params_nested) - } - else{ - if (!is.null(params_master$a)) params_nested$alpha <- params_master$a else params_nested$alpha = -1 - result <- lm.logpost.bas(y, x, model, complex, params_nested) - } - - } else { - stop("Unexpected error in prior_beta logic for Gaussian.") - } - } else { - stop("Unsupported family: ", params_master$family, ". Supported families are 'binomial', 'poisson', 'gamma', or 'gaussian'.") - } - - # Ensure consistent return structure - if (is.null(result$crit) || is.null(result$coefs)) { - stop("Error in computation: Returned result does not contain 'crit' and 'coefs'.") - } - - return(list(crit = result$crit, coefs = result$coefs)) -} - - - #' Master Log Marginal Likelihood Function @@ -819,11 +580,11 @@ fbms.mlik.master <- function(y, x, model, complex, params = list(family = "gauss #' \item{coefs}{Posterior mode of the coefficients.} #' #' @examples -#' fbms.mlik.master2(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), list(family = "gaussian", prior_beta = "g-prior")) +#' fbms.mlik.master(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), list(family = "gaussian", prior_beta = "g-prior")) #' #' @importFrom BAS beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform #' @export -fbms.mlik.master2 <- function(y, x, model, complex, params = list(family = "gaussian", beta_prior = list(type = "g-prior"), r = exp(-0.5))) { +fbms.mlik.master <- function(y, x, model, complex, params = list(family = "gaussian", beta_prior = list(type = "g-prior"), r = exp(-0.5))) { # Extract dimensions n <- length(y) p <- sum(model) - 1 # Number of predictors excluding intercept diff --git a/man/fbms.mlik.master.Rd b/man/fbms.mlik.master.Rd index 9ee5f71..f45a280 100644 --- a/man/fbms.mlik.master.Rd +++ b/man/fbms.mlik.master.Rd @@ -9,7 +9,7 @@ fbms.mlik.master( x, model, complex, - params = list(family = "gaussian", prior_beta = "g-prior", r = exp(-0.5)) + params = list(family = "gaussian", beta_prior = list(type = "g-prior"), r = exp(-0.5)) ) } \arguments{ diff --git a/man/fbms.mlik.master2.Rd b/man/fbms.mlik.master2.Rd deleted file mode 100644 index 71cad14..0000000 --- a/man/fbms.mlik.master2.Rd +++ /dev/null @@ -1,73 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/likelihoods.R -\name{fbms.mlik.master2} -\alias{fbms.mlik.master2} -\title{Master Log Marginal Likelihood Function} -\usage{ -fbms.mlik.master2( - y, - x, - model, - complex, - params = list(family = "gaussian", beta_prior = list(type = "g-prior"), r = exp(-0.5)) -) -} -\arguments{ -\item{y}{A numeric vector containing the dependent variable.} - -\item{x}{A matrix containing the precalculated features (independent variables).} - -\item{model}{A logical vector indicating which variables to include in the model.} - -\item{complex}{A list of complexity measures for the features.} - -\item{params}{A list of parameters controlling the model family, prior, and tuning parameters. -Key elements include: -\itemize{ -\item family: "binomial", "poisson", "gamma" (all three referred to as GLM below), or "gaussian" (default: "gaussian") -\item prior_beta: Type of prior as a string (default: "g-prior"). Possible values include: -\itemize{ -\item "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) -\item "CH": Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, optionally \code{s}) -\item "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires \code{a} for Gaussian) -\item "EB-global": Empirical Bayes local prior (Gaussian, requires \code{a} for Gaussian) -\item "g-prior": Zellner's g-prior (GLM/Gaussian, requires \code{g}) -\item "hyper-g": Hyper-g prior (GLM/Gaussian, requires \code{a}) -\item "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires \code{a}) -\item "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, \code{s}, \code{rho}, \code{v}, \code{k}) -\item "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) -\item "TG": Truncated Gamma prior (GLM/Gamma, requires \code{a}, \code{s}) -\item "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) -\item "uniform": Uniform prior (GLM/Gaussian, no additional args) -\item "benchmark": Benchmark prior (Gaussian/GLM, no additional args) -\item "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) -\item "robust": Robust prior (Gaussian/GLM, no additional args) -\item "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) -\item "ZS-null": Zellner-Siow null prior (Gaussian, requires \code{a}) -\item "ZS-full": Zellner-Siow full prior (Gaussian, requires \code{a}) -\item "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires \code{a}) -\item "AIC": AIC prior from BAS (Gaussian, requires penalty \code{a}) -\item "BIC": BIC prior from BAS (Gaussian/GLM) -\item "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires \code{a}) -} -\item r: Model complexity penalty (default: 1/n) -\item g: Tuning parameter for g-prior (default: max(n, p^2)) -\item a, b, s, v, rho, k: Hyperparameters for various priors -\item n: Sample size for some priors (default: length(y)) -\item var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") -\item laplace: Logical for Laplace approximation in GLM only (default: FALSE) -}} -} -\value{ -A list with elements: -\item{crit}{Log marginal likelihood combined with the log prior.} -\item{coefs}{Posterior mode of the coefficients.} -} -\description{ -This function serves as a unified interface to compute the log marginal likelihood -for different regression models and priors by calling specific log likelihood functions. -} -\examples{ -fbms.mlik.master2(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), list(family = "gaussian", prior_beta = "g-prior")) - -} diff --git a/man/glm.logpost.bas.Rd b/man/glm.logpost.bas.Rd index d8930aa..11ae24c 100644 --- a/man/glm.logpost.bas.Rd +++ b/man/glm.logpost.bas.Rd @@ -11,8 +11,7 @@ glm.logpost.bas( x, model, complex, - params = list(r = exp(-0.5), family = "binomial", beta_prior = list(type = Jeffreys()), - laplace = FALSE) + params = list(r = NULL, family = "binomial", beta_prior = Jeffreys(), laplace = FALSE) ) } \arguments{ diff --git a/man/log_prior.Rd b/man/log_prior.Rd index 80a87a1..1fb5950 100644 --- a/man/log_prior.Rd +++ b/man/log_prior.Rd @@ -1,9 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/likelihoods.R +% Please edit documentation in R/likelihoods.R, R/likelihoods2.R \name{log_prior} \alias{log_prior} \title{Log model prior function} \usage{ +log_prior(params, complex) + log_prior(params, complex) } \arguments{ @@ -12,12 +14,18 @@ log_prior(params, complex) \item{complex}{list of complexity measures of the features included into the model} } \value{ +A numeric with the log model prior. + A numeric with the log model prior. } \description{ +Log model prior function + Log model prior function } \examples{ log_prior(params = list(r=2), complex = list(oc = 2)) +log_prior(params = list(r=2), complex = list(oc = 2)) + } diff --git a/tests_current/some tests.R b/tests_current/some tests.R index 6e9a5df..cbc37a4 100644 --- a/tests_current/some tests.R +++ b/tests_current/some tests.R @@ -42,7 +42,7 @@ families <- c("gaussian", "binomial", "poisson", "gamma") # Required parameters for priors prior_params <- list( - "g-prior" = list(g = 10), + "g-prior" = list(g = 50,a = 50), "hyper-g" = list(a = 3), "hyper-g-n" = list(a = 3), "ZS-null" = list(a = 3), @@ -70,22 +70,38 @@ for (family in families) { params <- list(family = family,beta_prior = list(type = prior)) + params_old <- list(family = family,prior_beta = prior) + + # Add required parameters if applicable if (prior %in% names(prior_params)) { params$beta_prior <- c(params$beta_prior, prior_params[[prior]]) } + if (prior %in% names(prior_params)) { + params_old <- c(params_old, prior_params[[prior]]) + } + # Run the model tryCatch({ - result <- fbms.mlik.master2(data$y, data$x, model = c(T, rep(TRUE, ncol(data$x) - 1)), + + set.seed(1) + result <- fbms.mlik.master(data$y, data$x, model = c(T, rep(TRUE, ncol(data$x) - 1)), complex = list(oc = 1), params = params) - result.null <- fbms.mlik.master2(data$y, data$x, model = c(T, T, rep(FALSE, ncol(data$x) - 2)), - complex = list(oc = 1), params = params) + set.seed(1) + result.null <- fbms.mlik.master(data$y, data$x, model = c(T, T, rep(FALSE, ncol(data$x) - 2)), + complex = list(oc = 1), params = params) + set.seed(1) + result.old <- fbms.mlik.master_old(data$y, data$x, model = c(T, rep(TRUE, ncol(data$x) - 1)), + complex = list(oc = 1), params = params_old)# + set.seed(1) + result.null.old <- fbms.mlik.master_old(data$y, data$x, model = c(T, T, rep(FALSE, ncol(data$x) - 2)), + complex = list(oc = 1), params = params_old) - crit_rounded <- round(result$crit - result.null$crit, 8) - coefs_mean <- round(mean(result$coefs) - mean(result.null$coefs), 8) + crit_rounded <- round(result$crit - result.null$crit - result.old$crit + result.null.old$crit, 8) + coefs_mean <- round(mean(result$coefs) - mean(result.null$coefs) - mean(result.old$coefs) + mean(result.null.old$coefs), 8) cat(sprintf("\nPrior: %-15s -> crit: %8.4f, mean(coefs): %8.4f", prior, crit_rounded, coefs_mean)) -- GitLab From a1b09847510aba0d807191cf62d49e0f095d6db5 Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Sat, 5 Apr 2025 14:45:45 +0200 Subject: [PATCH 17/36] * Make fbms tests pass. --- R/results.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/results.R b/R/results.R index d1f6f35..51c69a6 100644 --- a/R/results.R +++ b/R/results.R @@ -297,15 +297,15 @@ get.mpm.model <- function(result, y, x, labels = F, family = "gaussian", loglik. if (family == "binomial") loglik.pi <- logistic.loglik - if (is(result, "mjmcmc.parallel")) { - models <- unlist(lapply(result, function (x) x$models), recursive = FALSE) + if (is(result, "mjmcmc_parallel")) { + models <- unlist(lapply(result$chains, function (x) x$models), recursive = FALSE) marg.probs <- marginal.probs.renorm(models)$probs - features <- result[[1]]$populations + features <- result$chains[[1]]$populations } else if (is(result, "gmjmcmc")) { best_pop <- which.max(unlist(result$best.margs)) marg.probs <- result$marg.probs[[best_pop]] features <- result$populations[[best_pop]] - } else if (is(result, "gmjmcmc.parallel") || is(result, "mjmcmc")) { + } else if (is(result, "gmjmcmc_merged") || is(result, "mjmcmc")) { marg.probs <- result$marg.probs features <- result$features } -- GitLab From b144c306a1d6b6bc4952b747d686c4ced9c958ee Mon Sep 17 00:00:00 2001 From: aliaksah Date: Mon, 7 Apr 2025 14:15:21 +0200 Subject: [PATCH 18/36] select.mlpost.fun for g-prior case corrected --- R/fbms.R | 2 -- R/likelihoods.R | 2 -- 2 files changed, 4 deletions(-) diff --git a/R/fbms.R b/R/fbms.R index 99cf477..6fa07ec 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -348,8 +348,6 @@ select.mlpost.fun <- function (beta_prior, family) { return(gaussian_tcch_log_likelihood) } else if (beta_prior == "Jeffreys-BIC") { return(gaussian.loglik) - } else if (beta_prior == "g-prior") { - return(gaussian.loglik.g) } else if (beta_prior %in% gaussian_bas) { return(lm.logpost.bas) } diff --git a/R/likelihoods.R b/R/likelihoods.R index 91c7b8d..27a094e 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -531,8 +531,6 @@ log_prior <- function (params, complex) { } - - #' Master Log Marginal Likelihood Function #' #' This function serves as a unified interface to compute the log marginal likelihood -- GitLab From eeeabacd3d56800b16f863c6d16386c168b0fdb7 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Mon, 7 Apr 2025 14:53:45 +0200 Subject: [PATCH 19/36] laplace = F set under glm priors in Jon's version if not specified. all priros are now consitent with my version. --- R/likelihoods.R | 4 ++++ R/likelihoods2.R | 1 + 2 files changed, 5 insertions(+) diff --git a/R/likelihoods.R b/R/likelihoods.R index 27a094e..b667a52 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -26,6 +26,8 @@ glm.logpost.bas <- function (y, x, model, complex, params = list(r = NULL, famil params <- list(r = 1 / dim(x)[1], family = "binomial", beta_prior = g.prior(max(dim(x)[1], sum(model) - 1)), laplace = FALSE) else if(length(params$r) == 0) params$r = 1 / dim(x)[1] + if(length(params$laplace) == 0) + params$laplace = FALSE p <- sum(model) - 1 if (p == 0) { probinit <- as.numeric(c(1, 0.99)) @@ -34,6 +36,7 @@ glm.logpost.bas <- function (y, x, model, complex, params = list(r = NULL, famil probinit <- as.numeric(c(1, rep(0.99, p))) } + mod <- NULL if (params$family == "binomial") @@ -43,6 +46,7 @@ glm.logpost.bas <- function (y, x, model, complex, params = list(r = NULL, famil else family_use <- Gamma() + tryCatch({ suppressWarnings({ mod <- .Call( BAS:::C_glm_deterministic, diff --git a/R/likelihoods2.R b/R/likelihoods2.R index ea8697a..16083f7 100644 --- a/R/likelihoods2.R +++ b/R/likelihoods2.R @@ -35,6 +35,7 @@ glm.logpost.bas2 <- function (y, x, model, complex, params = list(r = exp(-0.5), mod<-NULL + tryCatch({ if(params$family == "binomial") suppressWarnings({ -- GitLab From d09b76a704ed20ea21ad021449a3e67a26878a8e Mon Sep 17 00:00:00 2001 From: aliaksah Date: Mon, 7 Apr 2025 15:29:01 +0200 Subject: [PATCH 20/36] nan checks added to likelihoods --- R/likelihoods.R | 25 ++++++++++++++++++++----- R/likelihoods2.R | 5 +++++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/R/likelihoods.R b/R/likelihoods.R index b667a52..b37f8bf 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -70,8 +70,8 @@ glm.logpost.bas <- function (y, x, model, complex, params = list(r = NULL, famil cat("An error occurred:", conditionMessage(e), "\n") }) - if (length(mod) == 0) { - return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)), coefs = rep(0, p + 1))) + if (length(mod) == 0 || is.nan(mod$logmarg[2])) { + return(list(crit = -.Machine$double.xmax + log_prior(params, complex), coefs = rep(0, p + 1))) } if (p == 0) { @@ -137,9 +137,9 @@ lm.logpost.bas <- function (y, x, model, complex, params = list(r = exp(-0.5), b # You can also print a message or log the error if needed cat("An error occurred:", conditionMessage(e), "\n") }) - #browser() - if (length(mod) == 0) { - return(list(crit = -.Machine$double.xmax + log(params$r * sum(complex$oc)), coefs = rep(0, p + 1))) + + if (length(mod) == 0 || is.nan(mod$logmarg[2])) { + return(list(crit = -.Machine$double.xmax + log_prior(params, complex), coefs = rep(0, p + 1))) } if (p == 0) { @@ -174,6 +174,12 @@ logistic.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5))) else if(length(params$r) == 0) params$r = 1 / dim(x)[1] suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = binomial())}) + + if (length(mod) == 0 || is.nan(mod$deviance)) { + return(list(crit = -.Machine$double.xmax + log_prior(params, complex), coefs = rep(0,sum(model)))) + } + + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * sum(complex$oc))) / 2 return(list(crit = ret, coefs = mod$coefficients)) } @@ -211,6 +217,11 @@ glm.loglik <- function (y, x, model, complex, params = list(r = exp(-0.5), famil #browser() suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = fam)}) + + if (length(mod) == 0 || is.nan(mod$deviance)) { + return(list(crit = -.Machine$double.xmax + log_prior(params, complex), coefs = rep(0,sum(model)))) + } + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * sum(complex$oc))) / 2 return(list(crit = ret, coefs = mod$coefficients)) } @@ -449,6 +460,10 @@ gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = lp <- log_prior(params, complex) + if (is.nan(marginal_likelihood)) { + return(list(crit = -.Machine$double.xmax + lp, coefs = rep(0,sum(model)))) + } + return(list(crit = marginal_likelihood + lp, coefs = fitted_model$coefficients)) } diff --git a/R/likelihoods2.R b/R/likelihoods2.R index 16083f7..eba3485 100644 --- a/R/likelihoods2.R +++ b/R/likelihoods2.R @@ -225,6 +225,11 @@ glm.loglik2 <- function (y, x, model, complex, params = list(r = exp(-0.5),famil } suppressWarnings({mod <- fastglm(as.matrix(x[, model]), y, family = fam)}) + + if (length(mod) == 0 || is.nan(mod$deviance)) { + return(list(crit = -.Machine$double.xmax + log_prior(params, complex), coefs = rep(0,sum(model)))) + } + ret <- (-(mod$deviance + log(length(y)) * (mod$rank - 1) - 2 * log(params$r) * sum(complex$oc))) / 2 return(list(crit=ret, coefs=mod$coefficients)) } -- GitLab From b7780f339b887098c29c04a54a662a532e684db8 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Mon, 7 Apr 2025 16:42:21 +0200 Subject: [PATCH 21/36] docs and examples updated with x and y as per new requirenment --- R/arguments.R | 18 +++++++++++++--- R/diagnostics.R | 2 +- R/fbms.R | 47 +++++++++++++++++++++++++++++++++++------ R/gmjmcmc.R | 2 +- R/likelihoods.R | 8 +++---- R/likelihoods2.R | 4 ++-- R/mjmcmc.R | 2 +- R/parallel.R | 11 ++++------ R/results.R | 33 ++++++++++++----------------- R/summary.R | 13 ++++++------ man/compute_effects.Rd | 4 ++-- man/diagn_plot.Rd | 2 +- man/fbms.Rd | 43 ++++++++++++++++++++++++++++++++----- man/fbms.mlik.master.Rd | 4 ++-- 14 files changed, 132 insertions(+), 61 deletions(-) diff --git a/R/arguments.R b/R/arguments.R index 38b7037..93642b5 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -191,8 +191,13 @@ gen.params.mjmcmc <- function (data) { ### Create a list of parameters for the algorithm ## Get the dimensions of the data to set parameters based on it - ncov <- ncol(data$x) - data$fixed - + if(is.list(data) && length(data$fixed)>0 && length(data$x) > 0) + ncov <- ncol(data$x) - data$fixed #This is not ok as for fbms no one knows anything about fixed and $x! + else # I fixed that, but this is somewhat not elegant still + { + data.dim <- dim(data) + ncov <- data.dim[2] - 2 + } ## Local optimization parameters sa_kern <- list(probs=c(0.1, 0.05, 0.2, 0.3, 0.2, 0.15), neigh.size=1, neigh.min=1, neigh.max=2) # Simulated annealing proposal kernel parameters @@ -316,7 +321,14 @@ gen.params.gmjmcmc <- function (data) { # Get mjmcmc params params <- gen.params.mjmcmc(data) - ncov <- ncol(data$x) - data$fixed + ## Get the dimensions of the data to set parameters based on it + if(is.list(data) && length(data$fixed)>0 && length(data$x) > 0) + ncov <- ncol(data$x) - data$fixed #This is not ok as for fbms no one knows anything about fixed and $x! + else # I fixed that, but this is somewhat not elegant still + { + data.dim <- dim(data) + ncov <- data.dim[2] - 2 + } feat_params <- list(D = 5, L = 15, # Hard limits on feature complexity alpha = "unit", # alpha strategy ("unit" = None, "deep" strategy 3 from Hubin et al., "random" fully Bayesian strategy) diff --git a/R/diagnostics.R b/R/diagnostics.R index f27deb2..bf1e239 100644 --- a/R/diagnostics.R +++ b/R/diagnostics.R @@ -17,7 +17,7 @@ #' @return A list of summary statistics for checking convergence with given confidence intervals #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc( y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) #' diagnstats <- diagn_plot(result) #' #' @export diff --git a/R/fbms.R b/R/fbms.R index 6fa07ec..98a53f2 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -5,7 +5,37 @@ #' family, data, transforms, and other parameters to customize the model. #' #' @param formula A formula object specifying the model structure. Default is NULL. -#' @param family The distribution family of the response variable. Currently supports "gaussian", "binomial" and "custom". Default is "gaussian". +#' @param family The distribution family of the response variable. Currently supports "gaussian", "binomial", "poisson", "gamma", and "custom". Default is "gaussian". +#' @param beta_prior Type of prior as a string (default: "g-prior" with a = max(n, p^2)). Possible values include: +#' - "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +#' - "CH": Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, optionally `s`) +#' - "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires `a` for Gaussian) +#' - "EB-global": Empirical Bayes local prior (Gaussian, requires `a` for Gaussian) +#' - "g-prior": Zellner's g-prior (GLM/Gaussian, requires `g`) +#' - "hyper-g": Hyper-g prior (GLM/Gaussian, requires `a`) +#' - "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires `a`) +#' - "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires `a`, `b`, `s`, `rho`, `v`, `k`) +#' - "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +#' - "TG": Truncated Gamma prior (GLM/Gamma, requires `a`, `s`) +#' - "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) +#' - "uniform": Uniform prior (GLM/Gaussian, no additional args) +#' - "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +#' - "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +#' - "robust": Robust prior (Gaussian/GLM, no additional args) +#' - "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +#' - "ZS-null": Zellner-Siow null prior (Gaussian, requires `a`) +#' - "ZS-full": Zellner-Siow full prior (Gaussian, requires `a`) +#' - "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires `a`) +#' - "AIC": AIC prior from BAS (Gaussian, requires penalty `a`) +#' - "BIC": BIC prior from BAS (Gaussian/GLM) +#' - "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires `a`) +#' - r: Model complexity penalty (default: 1/n) +#' - g: Tuning parameter for g-prior (default: max(n, p^2)) +#' - a, b, s, v, rho, k: Hyperparameters for various priors +#' - n: Sample size for some priors (default: length(y)) +#' - var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") +#' - laplace: Logical for Laplace approximation in GLM only (default: FALSE) +#' @param model_prior a list with parameters of model priors, by default r should be provided #' @param loglik.pi Custom function to compute the logarithm of the posterior mode based on logarithm of marginal likelihood and logarithm of prior functions (needs specification only used if family = "custom") #' @param data A data frame containing the variables in the model. If NULL, the variables are taken from the environment of the formula. Default is NULL. #' @param method Which fitting algorithm should be used, currently implemented options include "gmjmcmc", "gmjmcmc.parallel", "mjmcmc" and "mjmcmc.parallel" with "mjmcmc" being the default and 'mjmcmc' means that only linear models will be estimated @@ -28,7 +58,6 @@ #' cores = 1 #' ) #' summary(fbms_result) -#' plot(fbms_result) #' #' #' @seealso \code{\link{mjmcmc}}, \code{\link{gmjmcmc}}, \code{\link{gmjmcmc.parallel}} @@ -36,16 +65,22 @@ fbms <- function ( formula = NULL, family = "gaussian", - beta_prior = list(type = "g-prior", g = 5), - model_prior = list(r = exp(-0.5)), + beta_prior = list(type = "g-prior"), + model_prior = NULL, data = NULL, impute = FALSE, - loglik.pi = gaussian.loglik, + loglik.pi = NULL, method = "mjmcmc", verbose = TRUE, ... ) { - if (is.list(beta_prior) || is.list(model_prior)) { + + if(length(data) == 0) + stop("Training data must be provided!") + + if(length(model_prior) == 0) + model_prior = list(r = 1/dim(data)[1]) + if (family != "custom") { mlpost_params <- model_prior loglik.pi <- select.mlpost.fun(beta_prior$type, family) if(family == "gaussian") diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index 6b3b8e6..b66f65e 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -40,7 +40,7 @@ NULL #' \item{best}{Best marginal model probability throughout the run, represented as the maximum value in \code{unlist(best.margs)}.} #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transform = c("p0", "exp_dbl")) #' summary(result) #' plot(result) #' diff --git a/R/likelihoods.R b/R/likelihoods.R index b37f8bf..f645dc1 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -326,12 +326,12 @@ gaussian.loglik.g <- function (y, x, model, complex, params = NULL) { #' \item{coefs}{Posterior mode of the coefficients.} #' #' @examples -#' gaussian_tcch_log_likelihood(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) +#' gaussian_tcch_log_likelihood(rnorm(100), matrix(rnorm(100)), c(TRUE), list(oc=1)) #' #' @importFrom BAS phi1 hypergeometric1F1 hypergeometric2F1 #' @importFrom tolerance F1 #' @export -gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = exp(-0.5), beta_prior = "intrinsic")) { +gaussian_tcch_log_likelihood <- function(y, x, model, complex, params = list(r = exp(-0.5), beta_prior = list(type = "intrinsic"))) { # Fit the linear model using fastglm fitted_model <- fastglm(as.matrix(x[, model]), y, family = gaussian()) log_likelihood <- -(fitted_model$aic -2 * (fitted_model$rank))/2 @@ -586,7 +586,7 @@ log_prior <- function (params, complex) { #' - "BIC": BIC prior from BAS (Gaussian/GLM) #' - "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires `a`) #' - r: Model complexity penalty (default: 1/n) -#' - g: Tuning parameter for g-prior (default: max(n, p^2)) +#' - a: Tuning parameter for g-prior (default: max(n, p^2)) #' - a, b, s, v, rho, k: Hyperparameters for various priors #' - n: Sample size for some priors (default: length(y)) #' - var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") @@ -597,7 +597,7 @@ log_prior <- function (params, complex) { #' \item{coefs}{Posterior mode of the coefficients.} #' #' @examples -#' fbms.mlik.master(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), list(family = "gaussian", prior_beta = "g-prior")) +#' fbms.mlik.master(y = rnorm(100), x = matrix(rnorm(100)), c(TRUE,TRUE), list(oc = 1),params = list(family = "gaussian", beta_prior = list(type = "g-prior", a = 2), r = exp(-0.5))) #' #' @importFrom BAS beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform #' @export diff --git a/R/likelihoods2.R b/R/likelihoods2.R index eba3485..6a9f8cb 100644 --- a/R/likelihoods2.R +++ b/R/likelihoods2.R @@ -329,7 +329,7 @@ gaussian.loglik2.g <- function (y, x, model, complex, params = NULL) #' \item{coefs}{Posterior mode of the coefficients.} #' #' @examples -#' gaussian_tcch_log_likelihood(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) +#' gaussian_tcch_log_likelihood2(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) #' #' @importFrom BAS phi1 hypergeometric1F1 hypergeometric2F1 #' @importFrom tolerance F1 @@ -609,7 +609,7 @@ log_prior <- function (params, complex) { #' \item{coefs}{Posterior mode of the coefficients.} #' #' @examples -#' fbms.mlik.master(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), list(family = "gaussian", prior_beta = "g-prior")) +#' fbms.mlik.master(rnorm(100), matrix(rnorm(100)), c(TRUE,TRUE), list(oc = 1)) #' #' @importFrom BAS beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform #' @export diff --git a/R/mjmcmc.R b/R/mjmcmc.R index 12cdb2b..786b5aa 100644 --- a/R/mjmcmc.R +++ b/R/mjmcmc.R @@ -26,7 +26,7 @@ #' \item{populations}{The covariates represented as a list of features.} #' #' @examples -#' result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) #' summary(result) #' plot(result) #' diff --git a/R/parallel.R b/R/parallel.R index fa3eadc..b7a4401 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -88,7 +88,7 @@ rmclapply <- function(runs, args, fun, mc.cores = NULL) { #' @return Merged results from multiple mjmcmc runs #' #' @examples -#' result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc.parallel(runs = 1, cores = 1, y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100)) #' summary(result) #' plot(result) #' @@ -116,12 +116,9 @@ mjmcmc.parallel <- function(runs = 2, cores = getOption("mc.cores", 2L), ...) { #' result <- gmjmcmc.parallel( #' runs = 1, #' cores = 1, -#' list(populations = "best", complex.measure = 2, tol = 0.0000001), -#' matrix(rnorm(600), 100), -#' P = 2, -#' gaussian.loglik, -#' loglik.alpha = gaussian.loglik.alpha, -#' c("p0", "exp_dbl") +#' y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' transforms = c("p0", "exp_dbl") #' ) #' #' summary(result) diff --git a/R/results.R b/R/results.R index 51c69a6..322ef7d 100644 --- a/R/results.R +++ b/R/results.R @@ -32,12 +32,9 @@ #' result <- gmjmcmc.parallel( #' runs = 1, #' cores = 1, -#' list(populations = "best", complex.measure = 2, tol = 0.0000001), -#' matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), #' P = 2, -#' gaussian.loglik, -#' loglik.alpha = gaussian.loglik.alpha, -#' c("p0", "exp_dbl") +#' transforms = c("p0", "exp_dbl") #' ) #' #' summary(result) @@ -226,7 +223,7 @@ population.weigths <- function (results, pops.use) { #' @return A character representation of a model #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) #' summary(result) #' plot(result) #' model.string(c(TRUE, FALSE, TRUE, FALSE, TRUE), result$populations[[1]]) @@ -357,7 +354,7 @@ get.mpm.model <- function(result, y, x, labels = F, family = "gaussian", loglik. #' } #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), P = 2, transforms = c("p0", "exp_dbl")) #' get.best.model(result) #' #' @export @@ -422,7 +419,7 @@ get.best.model.mjmcmc <- function (result, labels) { #' @return A matrix of character representations of the features of a model. #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) #' string.population(result$populations[[1]]) #' #' @export @@ -440,7 +437,7 @@ string.population <- function(x, round = 2) { #' @return A matrix of character representations of a list of models. #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) #' string.population.models(result$populations[[2]], result$models[[2]]) #' #' @export @@ -461,7 +458,7 @@ string.population.models <- function(features, models, round = 2, link = "I") { #' @return No return value, just creates a plot #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) #' plot(result) #' #' @@ -498,7 +495,7 @@ plot.gmjmcmc <- function (x, count = "all", pop = "best", tol = 0.0000001, data #' @return No return value, just creates a plot #' #' @examples -#' result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) #' plot(result) #' #' @export @@ -536,7 +533,7 @@ marg.prob.plot <- function (feats.strings, marg.probs, count = "all", ...) { #' @return No return value, just creates a plot #' #' @examples -#' result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc.parallel(runs = 1, cores = 1, y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) #' plot(result) #' #' @export @@ -576,12 +573,10 @@ run.weigths <- function (results) { #' result <- gmjmcmc.parallel( #' runs = 1, #' cores = 1, -#' list(populations = "best", complex.measure = 2, tol = 0.0000001), -#' matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), #' P = 2, -#' gaussian.loglik, -#' loglik.alpha = gaussian.loglik.alpha, -#' c("p0", "exp_dbl") +#' transforms = c("p0", "exp_dbl") #' ) #' plot(result) #' @@ -613,8 +608,8 @@ plot.gmjmcmc_merged <- function (x, count = "all", pop = NULL,tol = 0.0000001, #' @examples #' #' data <- data.frame(matrix(rnorm(600), 100)) -#' result <- mjmcmc.parallel(runs = 2, cores = 1, data, gaussian.loglik) -#' compute_effects(result,labels = names(data)[-1]) +#' result <- mjmcmc.parallel(runs = 2, cores = 1, y = matrix(rnorm(100), 100),x = data, gaussian.loglik) +#' compute_effects(result,labels = names(data)) #' #' @seealso \code{\link{predict}} #' @export diff --git a/R/summary.R b/R/summary.R index ce3d6ab..3bcb876 100644 --- a/R/summary.R +++ b/R/summary.R @@ -14,7 +14,7 @@ #' \item{marg.probs}{Marginal probabilities corresponding to the ordered feature strings.} #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) #' summary(result, pop = "best") #' #' @export @@ -94,11 +94,10 @@ summary.gmjmcmc <- function (object, pop = "best", tol = 0.0001, labels = FALSE, #' runs = 1, #' cores = 1, #' list(populations = "best", complex.measure = 2, tol = 0.0000001), -#' matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), #' P = 2, -#' gaussian.loglik, -#' loglik.alpha = gaussian.loglik.alpha, -#' c("p0", "exp_dbl") +#' transforms = c("p0", "exp_dbl") #' ) #' summary(result) #' @@ -150,7 +149,7 @@ summary.gmjmcmc_merged <- function (object, tol = 0.0001, labels = FALSE, effect #' \item{marg.probs}{Marginal probabilities corresponding to the ordered feature strings.} #' #' @examples -#' result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc( y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) #' summary(result) #' #' @export @@ -178,7 +177,7 @@ summary.mjmcmc <- function (object, tol = 0.0001, labels = FALSE, effects = NULL #' \item{marg.probs}{Marginal probabilities corresponding to the ordered feature strings.} #' #' @examples -#' result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc.parallel(runs = 1, cores = 1, y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) #' summary(result) #' #' @export diff --git a/man/compute_effects.Rd b/man/compute_effects.Rd index a010591..cf540bb 100644 --- a/man/compute_effects.Rd +++ b/man/compute_effects.Rd @@ -24,8 +24,8 @@ Users can provide custom labels and specify quantiles for the computation of eff \examples{ data <- data.frame(matrix(rnorm(600), 100)) -result <- mjmcmc.parallel(runs = 2, cores = 1, data, gaussian.loglik) -compute_effects(result,labels = names(data)[-1]) +result <- mjmcmc.parallel(runs = 2, cores = 1, y = matrix(rnorm(100), 100),x = data, gaussian.loglik) +compute_effects(result,labels = names(data)) } \seealso{ diff --git a/man/diagn_plot.Rd b/man/diagn_plot.Rd index 058797b..2298dcd 100644 --- a/man/diagn_plot.Rd +++ b/man/diagn_plot.Rd @@ -26,7 +26,7 @@ A list of summary statistics for checking convergence with given confidence inte Plot convergence of best/median/mean/other summary log posteriors in time } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc( y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) diagnstats <- diagn_plot(result) } diff --git a/man/fbms.Rd b/man/fbms.Rd index 185020e..210b1df 100644 --- a/man/fbms.Rd +++ b/man/fbms.Rd @@ -8,11 +8,11 @@ Or Fit a BGLM model using Modified Mode Jumping Markov Chain Monte Carlo (MCMC) fbms( formula = NULL, family = "gaussian", - beta_prior = list(type = "g-prior", g = 5), - model_prior = list(r = exp(-0.5)), + beta_prior = list(type = "g-prior"), + model_prior = NULL, data = NULL, impute = FALSE, - loglik.pi = gaussian.loglik, + loglik.pi = NULL, method = "mjmcmc", verbose = TRUE, ... @@ -21,7 +21,41 @@ fbms( \arguments{ \item{formula}{A formula object specifying the model structure. Default is NULL.} -\item{family}{The distribution family of the response variable. Currently supports "gaussian", "binomial" and "custom". Default is "gaussian".} +\item{family}{The distribution family of the response variable. Currently supports "gaussian", "binomial", "poisson", "gamma", and "custom". Default is "gaussian".} + +\item{beta_prior}{Type of prior as a string (default: "g-prior" with a = max(n, p^2)). Possible values include: +- "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +- "CH": Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, optionally \code{s}) +- "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires \code{a} for Gaussian) +- "EB-global": Empirical Bayes local prior (Gaussian, requires \code{a} for Gaussian) +- "g-prior": Zellner's g-prior (GLM/Gaussian, requires \code{g}) +- "hyper-g": Hyper-g prior (GLM/Gaussian, requires \code{a}) +- "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires \code{a}) +- "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, \code{s}, \code{rho}, \code{v}, \code{k}) +- "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +- "TG": Truncated Gamma prior (GLM/Gamma, requires \code{a}, \code{s}) +- "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) +- "uniform": Uniform prior (GLM/Gaussian, no additional args) +- "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +- "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +- "robust": Robust prior (Gaussian/GLM, no additional args) +- "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +- "ZS-null": Zellner-Siow null prior (Gaussian, requires \code{a}) +- "ZS-full": Zellner-Siow full prior (Gaussian, requires \code{a}) +- "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires \code{a}) +- "AIC": AIC prior from BAS (Gaussian, requires penalty \code{a}) +- "BIC": BIC prior from BAS (Gaussian/GLM) +- "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires \code{a}) +\itemize{ +\item r: Model complexity penalty (default: 1/n) +\item g: Tuning parameter for g-prior (default: max(n, p^2)) +\item a, b, s, v, rho, k: Hyperparameters for various priors +\item n: Sample size for some priors (default: length(y)) +\item var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") +\item laplace: Logical for Laplace approximation in GLM only (default: FALSE) +}} + +\item{model_prior}{a list with parameters of model priors, by default r should be provided} \item{data}{A data frame containing the variables in the model. If NULL, the variables are taken from the environment of the formula. Default is NULL.} @@ -55,7 +89,6 @@ fbms_result <- fbms( cores = 1 ) summary(fbms_result) -plot(fbms_result) } diff --git a/man/fbms.mlik.master.Rd b/man/fbms.mlik.master.Rd index f45a280..b6424f9 100644 --- a/man/fbms.mlik.master.Rd +++ b/man/fbms.mlik.master.Rd @@ -51,7 +51,7 @@ Key elements include: \item "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires \code{a}) } \item r: Model complexity penalty (default: 1/n) -\item g: Tuning parameter for g-prior (default: max(n, p^2)) +\item a: Tuning parameter for g-prior (default: max(n, p^2)) \item a, b, s, v, rho, k: Hyperparameters for various priors \item n: Sample size for some priors (default: length(y)) \item var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") @@ -68,6 +68,6 @@ This function serves as a unified interface to compute the log marginal likeliho for different regression models and priors by calling specific log likelihood functions. } \examples{ -fbms.mlik.master(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), list(family = "gaussian", prior_beta = "g-prior")) +fbms.mlik.master(y = rnorm(100), x = matrix(rnorm(100)), c(TRUE,TRUE), list(oc = 1),params = list(family = "gaussian", beta_prior = list(type = "g-prior", a = 2), r = exp(-0.5))) } -- GitLab From 53da89eb38c77df8f47bb94cd1da40c4c90f690a Mon Sep 17 00:00:00 2001 From: aliaksah Date: Mon, 7 Apr 2025 22:41:21 +0200 Subject: [PATCH 22/36] docs updated --- man/fbms.mlik.master_old.Rd | 73 ++++++++++++++++++++++++++++ man/gaussian.loglik2.Rd | 30 ++++++++++++ man/gaussian.loglik2.alpha.Rd | 31 ++++++++++++ man/gaussian.loglik2.g.Rd | 29 +++++++++++ man/gaussian_tcch_log_likelihood.Rd | 4 +- man/gaussian_tcch_log_likelihood2.Rd | 37 ++++++++++++++ man/get.best.model.Rd | 2 +- man/glm.loglik2.Rd | 40 +++++++++++++++ man/glm.logpost.bas2.Rd | 40 +++++++++++++++ man/gmjmcmc.Rd | 2 +- man/gmjmcmc.parallel.Rd | 9 ++-- man/lm.logpost.bas2.Rd | 40 +++++++++++++++ man/logistic.loglik2.Rd | 34 +++++++++++++ man/logistic.loglik2.ala.Rd | 34 +++++++++++++ man/logistic.loglik2.alpha.Rd | 24 +++++++++ man/merge_results.Rd | 7 +-- man/mjmcmc.Rd | 2 +- man/mjmcmc.parallel.Rd | 2 +- man/model.string.Rd | 2 +- man/plot.gmjmcmc.Rd | 2 +- man/plot.gmjmcmc_merged.Rd | 8 ++- man/plot.mjmcmc.Rd | 2 +- man/plot.mjmcmc_parallel.Rd | 2 +- man/string.population.Rd | 2 +- man/string.population.models.Rd | 2 +- man/summary.gmjmcmc.Rd | 2 +- man/summary.gmjmcmc_merged.Rd | 7 ++- man/summary.mjmcmc.Rd | 2 +- man/summary.mjmcmc_parallel.Rd | 2 +- tests_current/some tests.R | 4 +- vignettes/FBMS-guide.Rmd | 46 ++---------------- 31 files changed, 443 insertions(+), 80 deletions(-) create mode 100644 man/fbms.mlik.master_old.Rd create mode 100644 man/gaussian.loglik2.Rd create mode 100644 man/gaussian.loglik2.alpha.Rd create mode 100644 man/gaussian.loglik2.g.Rd create mode 100644 man/gaussian_tcch_log_likelihood2.Rd create mode 100644 man/glm.loglik2.Rd create mode 100644 man/glm.logpost.bas2.Rd create mode 100644 man/lm.logpost.bas2.Rd create mode 100644 man/logistic.loglik2.Rd create mode 100644 man/logistic.loglik2.ala.Rd create mode 100644 man/logistic.loglik2.alpha.Rd diff --git a/man/fbms.mlik.master_old.Rd b/man/fbms.mlik.master_old.Rd new file mode 100644 index 0000000..f317e7f --- /dev/null +++ b/man/fbms.mlik.master_old.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{fbms.mlik.master_old} +\alias{fbms.mlik.master_old} +\title{Master Log Marginal Likelihood Function} +\usage{ +fbms.mlik.master_old( + y, + x, + model, + complex, + params = list(family = "gaussian", prior_beta = "g-prior", r = exp(-0.5)) +) +} +\arguments{ +\item{y}{A numeric vector containing the dependent variable.} + +\item{x}{A matrix containing the precalculated features (independent variables).} + +\item{model}{A logical vector indicating which variables to include in the model.} + +\item{complex}{A list of complexity measures for the features.} + +\item{params}{A list of parameters controlling the model family, prior, and tuning parameters. +Key elements include: +\itemize{ +\item family: "binomial", "poisson", "gamma" (all three referred to as GLM below), or "gaussian" (default: "gaussian") +\item prior_beta: Type of prior as a string (default: "g-prior"). Possible values include: +\itemize{ +\item "beta.prime": Beta-prime prior (GLM/Gaussian, no additional args) +\item "CH": Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, optionally \code{s}) +\item "EB-local": Empirical Bayes local prior (GLM/Gaussian, requires \code{a} for Gaussian) +\item "EB-global": Empirical Bayes local prior (Gaussian, requires \code{a} for Gaussian) +\item "g-prior": Zellner's g-prior (GLM/Gaussian, requires \code{g}) +\item "hyper-g": Hyper-g prior (GLM/Gaussian, requires \code{a}) +\item "hyper-g-n": Hyper-g/n prior (GLM/Gaussian, requires \code{a}) +\item "tCCH": Truncated Compound Hypergeometric prior (GLM/Gaussian, requires \code{a}, \code{b}, \code{s}, \code{rho}, \code{v}, \code{k}) +\item "intrinsic": Intrinsic prior (GLM/Gaussian, no additional args) +\item "TG": Truncated Gamma prior (GLM/Gamma, requires \code{a}, \code{s}) +\item "Jeffreys": Jeffreys prior (GLM/Gaussian, no additional args) +\item "uniform": Uniform prior (GLM/Gaussian, no additional args) +\item "benchmark": Benchmark prior (Gaussian/GLM, no additional args) +\item "ZS-adapted": Zellner-Siow adapted prior (Gaussian TCCH, no additional args) +\item "robust": Robust prior (Gaussian/GLM, no additional args) +\item "Jeffreys-BIC": Jeffreys prior with BIC approximation of marginal likelihood (Gaussian/GLM) +\item "ZS-null": Zellner-Siow null prior (Gaussian, requires \code{a}) +\item "ZS-full": Zellner-Siow full prior (Gaussian, requires \code{a}) +\item "hyper-g-laplace": Hyper-g Laplace prior (Gaussian, requires \code{a}) +\item "AIC": AIC prior from BAS (Gaussian, requires penalty \code{a}) +\item "BIC": BIC prior from BAS (Gaussian/GLM) +\item "JZS": Jeffreys-Zellner-Siow prior (Gaussian, requires \code{a}) +} +\item r: Model complexity penalty (default: 1/n) +\item g: Tuning parameter for g-prior (default: max(n, p^2)) +\item a, b, s, v, rho, k: Hyperparameters for various priors +\item n: Sample size for some priors (default: length(y)) +\item var: Variance assumption for Gaussian models ("known" or "unknown", default: "unknown") +\item laplace: Logical for Laplace approximation in GLM only (default: FALSE) +}} +} +\value{ +A list with elements: +\item{crit}{Log marginal likelihood combined with the log prior.} +\item{coefs}{Posterior mode of the coefficients.} +} +\description{ +This function serves as a unified interface to compute the log marginal likelihood +for different regression models and priors by calling specific log likelihood functions. +} +\examples{ +fbms.mlik.master(rnorm(100), matrix(rnorm(100)), c(TRUE,TRUE), list(oc = 1)) + +} diff --git a/man/gaussian.loglik2.Rd b/man/gaussian.loglik2.Rd new file mode 100644 index 0000000..4f19fc7 --- /dev/null +++ b/man/gaussian.loglik2.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{gaussian.loglik2} +\alias{gaussian.loglik2} +\title{Log likelihood function for gaussian regression with a Jeffreys prior and BIC approximation of MLIK with both known and unknown variance of the responses} +\usage{ +gaussian.loglik2(y, x, model, complex, params) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for gaussian regression with a Jeffreys prior and BIC approximation of MLIK with both known and unknown variance of the responses +} +\examples{ +gaussian.loglik(rnorm(100), matrix(rnorm(100)), TRUE, list(oc = 1), NULL) + + +} diff --git a/man/gaussian.loglik2.alpha.Rd b/man/gaussian.loglik2.alpha.Rd new file mode 100644 index 0000000..cb9874e --- /dev/null +++ b/man/gaussian.loglik2.alpha.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{gaussian.loglik2.alpha} +\alias{gaussian.loglik2.alpha} +\title{Log likelihood function for gaussian regression for alpha calculation +This function is just the bare likelihood function +Note that it only gives a proportional value and is equivalent to least squares} +\usage{ +gaussian.loglik2.alpha(a, data, mu_func) +} +\arguments{ +\item{a}{A vector of the alphas to be used} + +\item{data}{The data to be used for calculation} + +\item{mu_func}{The function linking the mean to the covariates, +as a string with the alphas as a[i].} +} +\value{ +A numeric with the log likelihood. +} +\description{ +Log likelihood function for gaussian regression for alpha calculation +This function is just the bare likelihood function +Note that it only gives a proportional value and is equivalent to least squares +} +\examples{ +\dontrun{ +gaussian.loglik2.alpha(my_alpha,my_data,my_mu) +} +} diff --git a/man/gaussian.loglik2.g.Rd b/man/gaussian.loglik2.g.Rd new file mode 100644 index 0000000..85e32ea --- /dev/null +++ b/man/gaussian.loglik2.g.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{gaussian.loglik2.g} +\alias{gaussian.loglik2.g} +\title{Log likelihood function for linear regression using Zellners g-prior} +\usage{ +gaussian.loglik2.g(y, x, model, complex, params = NULL) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for linear regression using Zellners g-prior +} +\examples{ +gaussian.loglik2.g(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) + +} diff --git a/man/gaussian_tcch_log_likelihood.Rd b/man/gaussian_tcch_log_likelihood.Rd index 15fe9cf..54c9516 100644 --- a/man/gaussian_tcch_log_likelihood.Rd +++ b/man/gaussian_tcch_log_likelihood.Rd @@ -9,7 +9,7 @@ gaussian_tcch_log_likelihood( x, model, complex, - params = list(r = exp(-0.5), beta_prior = "intrinsic") + params = list(r = exp(-0.5), beta_prior = list(type = "intrinsic")) ) } \arguments{ @@ -32,6 +32,6 @@ A list with elements: This function computes the marginal likelihood of a Gaussian regression model under different priors. } \examples{ -gaussian_tcch_log_likelihood(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) +gaussian_tcch_log_likelihood(rnorm(100), matrix(rnorm(100)), c(TRUE), list(oc=1)) } diff --git a/man/gaussian_tcch_log_likelihood2.Rd b/man/gaussian_tcch_log_likelihood2.Rd new file mode 100644 index 0000000..3c5ad15 --- /dev/null +++ b/man/gaussian_tcch_log_likelihood2.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{gaussian_tcch_log_likelihood2} +\alias{gaussian_tcch_log_likelihood2} +\title{Log likelihood function for Gaussian regression with parameter priors from BAS package} +\usage{ +gaussian_tcch_log_likelihood2( + y, + x, + model, + complex, + params = list(r = exp(-0.5), prior_beta = "intrinsic") +) +} +\arguments{ +\item{y}{A numeric vector containing the dependent variable.} + +\item{x}{A matrix containing the independent variables, including an intercept column.} + +\item{model}{A logical vector indicating which variables to include in the model.} + +\item{complex}{A list containing complexity measures for the features.} + +\item{params}{A list of parameters for the log likelihood, specifying the tuning parameters of beta priors.} +} +\value{ +A list with elements: +\item{crit}{Log marginal likelihood combined with the log prior.} +\item{coefs}{Posterior mode of the coefficients.} +} +\description{ +This function computes the marginal likelihood of a Gaussian regression model under different priors. +} +\examples{ +gaussian_tcch_log_likelihood2(rnorm(100), matrix(rnorm(100)), TRUE, list(oc=1)) + +} diff --git a/man/get.best.model.Rd b/man/get.best.model.Rd index 42d8f9f..356a775 100644 --- a/man/get.best.model.Rd +++ b/man/get.best.model.Rd @@ -37,7 +37,7 @@ The function identifies the best model by selecting the one with the highest \co } } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), P = 2, transforms = c("p0", "exp_dbl")) get.best.model(result) } diff --git a/man/glm.loglik2.Rd b/man/glm.loglik2.Rd new file mode 100644 index 0000000..8a69870 --- /dev/null +++ b/man/glm.loglik2.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{glm.loglik2} +\alias{glm.loglik2} +\title{Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model.} +\usage{ +glm.loglik2( + y, + x, + model, + complex, + params = list(r = exp(-0.5), family = "Gamma") +) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user, family must be specified} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for glm regression with a Jeffreys parameter prior and BIC approximations of the posterior +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model. +} +\examples{ +glm.loglik(abs(rnorm(100))+1, matrix(rnorm(100)), TRUE, list(oc = 1)) + + +} diff --git a/man/glm.logpost.bas2.Rd b/man/glm.logpost.bas2.Rd new file mode 100644 index 0000000..c079449 --- /dev/null +++ b/man/glm.logpost.bas2.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{glm.logpost.bas2} +\alias{glm.logpost.bas2} +\title{Log likelihood function for glm regression with parameter priors from BAS package +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model.} +\usage{ +glm.logpost.bas2( + y, + x, + model, + complex, + params = list(r = exp(-0.5), family = "binomial", prior_beta = Jeffreys(), laplace = + FALSE) +) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors and family that BAS uses in glm models} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for glm regression with parameter priors from BAS package +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model. +} +\examples{ +glm.logpost.bas(as.integer(rnorm(100) > 0),cbind(1,matrix(rnorm(100))),c(TRUE,TRUE),list(oc = 1)) + +} diff --git a/man/gmjmcmc.Rd b/man/gmjmcmc.Rd index 8e6d661..06202d3 100644 --- a/man/gmjmcmc.Rd +++ b/man/gmjmcmc.Rd @@ -67,7 +67,7 @@ A list containing the following elements: Main algorithm for GMJMCMC (Genetically Modified MJMCMC) } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transform = c("p0", "exp_dbl")) summary(result) plot(result) diff --git a/man/gmjmcmc.parallel.Rd b/man/gmjmcmc.parallel.Rd index ee0a167..2897678 100644 --- a/man/gmjmcmc.parallel.Rd +++ b/man/gmjmcmc.parallel.Rd @@ -43,12 +43,9 @@ Run multiple gmjmcmc (Genetically Modified MJMCMC) runs in parallel returning a result <- gmjmcmc.parallel( runs = 1, cores = 1, - list(populations = "best", complex.measure = 2, tol = 0.0000001), - matrix(rnorm(600), 100), - P = 2, - gaussian.loglik, - loglik.alpha = gaussian.loglik.alpha, - c("p0", "exp_dbl") + y = matrix(rnorm(100), 100), + x = matrix(rnorm(600), 100), + transforms = c("p0", "exp_dbl") ) summary(result) diff --git a/man/lm.logpost.bas2.Rd b/man/lm.logpost.bas2.Rd new file mode 100644 index 0000000..e04283e --- /dev/null +++ b/man/lm.logpost.bas2.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{lm.logpost.bas2} +\alias{lm.logpost.bas2} +\title{Log likelihood function for Gaussian regression with parameter priors from BAS package +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model.} +\usage{ +lm.logpost.bas2( + y, + x, + model, + complex, + params = list(r = exp(-0.5), prior_beta = "g-prior", alpha = 4) +) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user, important to specify the tuning parameters of beta priors where the corresponding integers as prior_beta must be provided "g-prior" = 0, "hyper-g" = 1, "EB-local" = 2, "BIC" = 3, "ZS-null" = 4, "ZS-full" = 5, "hyper-g-laplace" = 6, "AIC" = 7, "EB-global" = 2, "hyper-g-n" = 8, "JZS" = 9 and in Gaussian models} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for Gaussian regression with parameter priors from BAS package +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model. +} +\examples{ +lm.logpost.bas(rnorm(100), cbind(1,matrix(rnorm(100))), c(TRUE,TRUE), list(oc = 1)) + + +} diff --git a/man/logistic.loglik2.Rd b/man/logistic.loglik2.Rd new file mode 100644 index 0000000..c9cc3c8 --- /dev/null +++ b/man/logistic.loglik2.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{logistic.loglik2} +\alias{logistic.loglik2} +\title{Log likelihood function for logistic regression with a Jeffreys parameter prior and BIC approximations of the posterior +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model.} +\usage{ +logistic.loglik2(y, x, model, complex, params = list(r = exp(-0.5))) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for logistic regression with a Jeffreys parameter prior and BIC approximations of the posterior +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model. +} +\examples{ +logistic.loglik2(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) + + +} diff --git a/man/logistic.loglik2.ala.Rd b/man/logistic.loglik2.ala.Rd new file mode 100644 index 0000000..190bd2f --- /dev/null +++ b/man/logistic.loglik2.ala.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{logistic.loglik2.ala} +\alias{logistic.loglik2.ala} +\title{Log likelihood function for logistic regression with an approximate Laplace approximations used +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model.} +\usage{ +logistic.loglik2.ala(y, x, model, complex, params = list(r = exp(-0.5))) +} +\arguments{ +\item{y}{A vector containing the dependent variable} + +\item{x}{The matrix containing the precalculated features} + +\item{model}{The model to estimate as a logical vector} + +\item{complex}{A list of complexity measures for the features} + +\item{params}{A list of parameters for the log likelihood, supplied by the user} +} +\value{ +A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). +} +\description{ +Log likelihood function for logistic regression with an approximate Laplace approximations used +This function is created as an example of how to create an estimator that is used +to calculate the marginal likelihood of a model. +} +\examples{ +logistic.loglik2.ala(as.integer(rnorm(100) > 0), matrix(rnorm(100)), TRUE, list(oc = 1)) + + +} diff --git a/man/logistic.loglik2.alpha.Rd b/man/logistic.loglik2.alpha.Rd new file mode 100644 index 0000000..56caadc --- /dev/null +++ b/man/logistic.loglik2.alpha.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/likelihoods2.R +\name{logistic.loglik2.alpha} +\alias{logistic.loglik2.alpha} +\title{Log likelihood function for logistic regression for alpha calculation +This function is just the bare likelihood function} +\usage{ +logistic.loglik2.alpha(a, data, mu_func) +} +\arguments{ +\item{a}{A vector of the alphas to be used} + +\item{data}{The data to be used for calculation} + +\item{mu_func}{The function linking the mean to the covariates, +as a string with the alphas as a[i].} +} +\value{ +A numeric with the log likelihood. +} +\description{ +Log likelihood function for logistic regression for alpha calculation +This function is just the bare likelihood function +} diff --git a/man/merge_results.Rd b/man/merge_results.Rd index c0a89ab..32c10f6 100644 --- a/man/merge_results.Rd +++ b/man/merge_results.Rd @@ -50,12 +50,9 @@ and merge the results together, simplifying by merging equivalent features (havi result <- gmjmcmc.parallel( runs = 1, cores = 1, - list(populations = "best", complex.measure = 2, tol = 0.0000001), - matrix(rnorm(600), 100), + y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, - gaussian.loglik, - loglik.alpha = gaussian.loglik.alpha, - c("p0", "exp_dbl") + transforms = c("p0", "exp_dbl") ) summary(result) diff --git a/man/mjmcmc.Rd b/man/mjmcmc.Rd index d2cff5e..ac4b908 100644 --- a/man/mjmcmc.Rd +++ b/man/mjmcmc.Rd @@ -50,7 +50,7 @@ A list containing the following elements: Main algorithm for MJMCMC (Genetically Modified MJMCMC) } \examples{ -result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) summary(result) plot(result) diff --git a/man/mjmcmc.parallel.Rd b/man/mjmcmc.parallel.Rd index 756ce4f..f4275a0 100644 --- a/man/mjmcmc.parallel.Rd +++ b/man/mjmcmc.parallel.Rd @@ -20,7 +20,7 @@ Merged results from multiple mjmcmc runs Run multiple mjmcmc runs in parallel, merging the results before returning. } \examples{ -result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc.parallel(runs = 1, cores = 1, y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100)) summary(result) plot(result) diff --git a/man/model.string.Rd b/man/model.string.Rd index 7a3e8be..0f4a158 100644 --- a/man/model.string.Rd +++ b/man/model.string.Rd @@ -22,7 +22,7 @@ A character representation of a model Function to generate a function string for a model consisting of features } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) summary(result) plot(result) model.string(c(TRUE, FALSE, TRUE, FALSE, TRUE), result$populations[[1]]) diff --git a/man/plot.gmjmcmc.Rd b/man/plot.gmjmcmc.Rd index ff2fd5d..f019a0c 100644 --- a/man/plot.gmjmcmc.Rd +++ b/man/plot.gmjmcmc.Rd @@ -28,7 +28,7 @@ Function to plot the results, works both for results from gmjmcmc and merged results from merge.results } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) plot(result) diff --git a/man/plot.gmjmcmc_merged.Rd b/man/plot.gmjmcmc_merged.Rd index f2aa5c6..717fa76 100644 --- a/man/plot.gmjmcmc_merged.Rd +++ b/man/plot.gmjmcmc_merged.Rd @@ -29,12 +29,10 @@ Plot a gmjmcmc_merged run result <- gmjmcmc.parallel( runs = 1, cores = 1, - list(populations = "best", complex.measure = 2, tol = 0.0000001), - matrix(rnorm(600), 100), + y = matrix(rnorm(100), 100), + x = matrix(rnorm(600), 100), P = 2, - gaussian.loglik, - loglik.alpha = gaussian.loglik.alpha, - c("p0", "exp_dbl") + transforms = c("p0", "exp_dbl") ) plot(result) diff --git a/man/plot.mjmcmc.Rd b/man/plot.mjmcmc.Rd index 050e495..6997792 100644 --- a/man/plot.mjmcmc.Rd +++ b/man/plot.mjmcmc.Rd @@ -22,7 +22,7 @@ Function to plot the results, works both for results from gmjmcmc and merged results from merge.results } \examples{ -result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) plot(result) } diff --git a/man/plot.mjmcmc_parallel.Rd b/man/plot.mjmcmc_parallel.Rd index 0a11c92..81c1c5e 100644 --- a/man/plot.mjmcmc_parallel.Rd +++ b/man/plot.mjmcmc_parallel.Rd @@ -20,7 +20,7 @@ No return value, just creates a plot Plot a mjmcmc_parallel run } \examples{ -result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc.parallel(runs = 1, cores = 1, y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) plot(result) } diff --git a/man/string.population.Rd b/man/string.population.Rd index 86c6d81..9ff8868 100644 --- a/man/string.population.Rd +++ b/man/string.population.Rd @@ -18,7 +18,7 @@ A matrix of character representations of the features of a model. Function to get a character representation of a list of features } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) string.population(result$populations[[1]]) } diff --git a/man/string.population.models.Rd b/man/string.population.models.Rd index 5c6c70e..492b886 100644 --- a/man/string.population.models.Rd +++ b/man/string.population.models.Rd @@ -22,7 +22,7 @@ A matrix of character representations of a list of models. Function to get a character representation of a list of models } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) string.population.models(result$populations[[2]], result$models[[2]]) } diff --git a/man/summary.gmjmcmc.Rd b/man/summary.gmjmcmc.Rd index ca112c7..1e6d859 100644 --- a/man/summary.gmjmcmc.Rd +++ b/man/summary.gmjmcmc.Rd @@ -41,7 +41,7 @@ A data frame containing the following columns: Function to print a quick summary of the results } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) summary(result, pop = "best") } diff --git a/man/summary.gmjmcmc_merged.Rd b/man/summary.gmjmcmc_merged.Rd index b979eb9..351a9f4 100644 --- a/man/summary.gmjmcmc_merged.Rd +++ b/man/summary.gmjmcmc_merged.Rd @@ -45,11 +45,10 @@ result <- gmjmcmc.parallel( runs = 1, cores = 1, list(populations = "best", complex.measure = 2, tol = 0.0000001), - matrix(rnorm(600), 100), + y = matrix(rnorm(100), 100), + x = matrix(rnorm(600), 100), P = 2, - gaussian.loglik, - loglik.alpha = gaussian.loglik.alpha, - c("p0", "exp_dbl") + transforms = c("p0", "exp_dbl") ) summary(result) diff --git a/man/summary.mjmcmc.Rd b/man/summary.mjmcmc.Rd index fde06a6..70dcf96 100644 --- a/man/summary.mjmcmc.Rd +++ b/man/summary.mjmcmc.Rd @@ -35,7 +35,7 @@ A data frame containing the following columns: Function to print a quick summary of the results } \examples{ -result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc( y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) summary(result) } diff --git a/man/summary.mjmcmc_parallel.Rd b/man/summary.mjmcmc_parallel.Rd index fad3f4a..23250d6 100644 --- a/man/summary.mjmcmc_parallel.Rd +++ b/man/summary.mjmcmc_parallel.Rd @@ -35,7 +35,7 @@ A data frame containing the following columns: Function to print a quick summary of the results } \examples{ -result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc.parallel(runs = 1, cores = 1, y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) summary(result) } diff --git a/tests_current/some tests.R b/tests_current/some tests.R index cbc37a4..3513181 100644 --- a/tests_current/some tests.R +++ b/tests_current/some tests.R @@ -20,7 +20,7 @@ gen_data <- function(family) { y <- rpois(n, lambda) } else if (family == "gamma") { shape <- 2 - rate <- exp(-x %*% beta + rnorm(n, mean = 0, sd = 1)) + rate <- exp(-x %*% beta + rnorm(n, mean = 0, sd = 0.5)) y <- rgamma(n, shape = shape, rate = rate) } else { stop("Unsupported family") @@ -42,7 +42,7 @@ families <- c("gaussian", "binomial", "poisson", "gamma") # Required parameters for priors prior_params <- list( - "g-prior" = list(g = 50,a = 50), + "g-prior" = list(g = 10,a = 10), "hyper-g" = list(a = 3), "hyper-g-n" = list(a = 3), "ZS-null" = list(a = 3), diff --git a/vignettes/FBMS-guide.Rmd b/vignettes/FBMS-guide.Rmd index 91c2bcc..af1a952 100644 --- a/vignettes/FBMS-guide.Rmd +++ b/vignettes/FBMS-guide.Rmd @@ -7,8 +7,8 @@ vignette: > %\VignetteEncoding{UTF-8} %\VignetteDepends{fastglm, FBMS} --- -The `FBMS` package provides functions for Flexible Bayesian Model Selection and Model Averaging. +The `FBMS` package provides functions for Flexible Bayesian Model Selection and Model Averaging. ```{r, include = FALSE} knitr::opts_chunk$set( @@ -16,47 +16,7 @@ knitr::opts_chunk$set( comment = "#>" ) ``` -# Examples -Below are provided examples on how to run the algorithm and what the results tell us, we begin by loading the package and a supplied dataset -```{r setup} -library(FBMS) -#library(GenSA) -#library(fastglm) -data("breastcancer") -bc <- breastcancer[,c(ncol(breastcancer),2:(ncol(breastcancer)-1))] -bc <- cbind(bc[, 1], 1, bc[, -1]) -``` -We need some nonlinear transformations for the algorithm to use, the package offers a selection of these, but you are also able to define your own. Here we create a list of the ones to use, all but one are supplied by the package. -```{r} -to3 <- function(x) x^3 -transforms <- c("sigmoid","sin_deg","exp_dbl","p0","troot","to3") -``` -By calling two functions in the package, a list of probabilities for various parts of the algorithm, as well as a list of parameters are created. The list of probabilities needs the list of transformations to be able to create the vector of probabilities for the different transformations -```{r} -probs <- gen.probs.gmjmcmc(transforms) -params <- gen.params.gmjmcmc(bc) -``` -We can use one of the supplied likelihood functions, but here we demonstrate how to create our own, it takes four arguments, the dependent $y$ variable, the matrix $X$ containing all independent variables, the model as a logical vector specifying the columns of $X$, and a list of complexity measures for the features involved in the model -```{r} -loglik.example <- function (y, x, model, complex, params) { - r <- 20/223 - suppressWarnings({mod <- fastglm(as.matrix(x[,model]), y, family=binomial())}) - ret <- (-(mod$deviance -2*log(r)*sum(complex$width)))/2 - return(list(crit=ret, coefs=mod$coefficients)) -} -``` -To be able to calculate the alphas when using for example strategy 3 as per Hubin et al., we need a function for the log likelihood, in this example we will use the function supplied by the package called `logistic.loglik.alpha`. With that function as a starting point, you can also create your own function for this. We also adjust our parameter list to use the third strategy. -```{r} -params$feat$alpha <- "random" -``` +# Examples -We are now ready to run the algorithm, in this vignette we will only run very few iterations for demonstration purposes, but the only thing that needs to be changed are the number or populations to visit `T`, the number of iterations per population `N` and the number of iterations for the final population `N.final` -```{r} -set.seed(1234) -result <- gmjmcmc(bc[, -1], bc[, 1], fixed = 1, loglik.example, logistic.loglik.alpha, transforms, P=3, N=30, N.final=60, probs, params) -``` -We can then summarize the results using the supplied function and get a plot of the importance of the parameters in the last population of features -```{r, fig.width=6, fig.height=6} -plot(result) -``` +Must be provided based on test_currect folder -- GitLab From a0ebc0ccb999a2f0ca8e8197adfaeefe74715051 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 9 Apr 2025 17:41:21 +0200 Subject: [PATCH 23/36] x, y notation fixed in all documentation examples --- R/feature.R | 2 +- R/gmjmcmc_support.R | 2 +- R/predict.R | 31 ++++++++++++++----------------- R/summary.R | 2 +- man/marginal.probs.Rd | 2 +- man/predict.gmjmcmc.Rd | 7 +++---- man/predict.gmjmcmc_merged.Rd | 7 +++---- man/predict.gmjmcmc_parallel.Rd | 9 ++++----- man/predict.mjmcmc.Rd | 4 ++-- man/predict.mjmcmc_parallel.Rd | 4 ++-- man/print.feature.Rd | 2 +- man/summary.gmjmcmc.Rd | 2 +- 12 files changed, 34 insertions(+), 40 deletions(-) diff --git a/R/feature.R b/R/feature.R index 28c2439..ee161d2 100644 --- a/R/feature.R +++ b/R/feature.R @@ -105,7 +105,7 @@ update.alphas <- function (feature, alphas, recurse=FALSE) { #' @return String representation of a feature #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), P = 2, transforms = c("p0", "exp_dbl")) #' print(result$populations[[1]][1]) #' #' @export diff --git a/R/gmjmcmc_support.R b/R/gmjmcmc_support.R index 5e4dcb1..4af0d4e 100644 --- a/R/gmjmcmc_support.R +++ b/R/gmjmcmc_support.R @@ -44,7 +44,7 @@ verify.inputs <- function (data, loglik.pi, transforms, T, N, N.final, probs, pa #' @return A numeric vector of marginal model probabilities based on relative frequencies of model visits in MCMC. #' #' @examples -#' result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), P = 2, transforms = c("p0", "exp_dbl")) #' marginal.probs(result$models[[1]]) #' #' @export diff --git a/R/predict.R b/R/predict.R index 396da42..9bad036 100644 --- a/R/predict.R +++ b/R/predict.R @@ -53,11 +53,10 @@ predict.bgnlm_model <- function(object, x, link = function(x) { x }, ... ) { #' #' @examples #' result <- gmjmcmc( -#' matrix(rnorm(600), 100), +#' x = matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100), #' P = 2, -#' gaussian.loglik, -#' loglik.alpha = gaussian.loglik.alpha, -#' c("p0", "exp_dbl") +#' transforms = c("p0", "exp_dbl") #' ) #' preds <- predict(result, matrix(rnorm(600), 100)) #' @@ -108,11 +107,10 @@ predict.gmjmcmc.2 <- function (object, x, link = function(x) x, quantiles = c(0. #' runs = 1, #' cores = 1, #' list(populations = "best", complex.measure = 2, tol = 0.0000001), -#' matrix(rnorm(600), 100), +#' x = matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100), #' P = 2, -#' gaussian.loglik, -#' loglik.alpha = gaussian.loglik.alpha, -#' c("p0", "exp_dbl") +#' transforms = c("p0", "exp_dbl") #' ) #' preds <- predict(result, matrix(rnorm(600), 100)) #' @@ -174,8 +172,8 @@ predict.gmjmcmc_merged <- function (object, x, link = function(x) x, quantiles = #' \item{quantiles}{Quantiles of aggregated predictions.} #' #' @examples -#' result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) -#' preds <- predict(result, matrix(rnorm(500), 100)) +#' result <- mjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), gaussian.loglik) +#' preds <- predict(result, matrix(rnorm(600), 100)) #' #' @export predict.mjmcmc <- function (object, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), ...) { @@ -209,8 +207,8 @@ predict.mjmcmc <- function (object, x, link = function(x) x, quantiles = c(0.025 #' \item{quantiles}{Quantiles of aggregated predictions.} #' #' @examples -#' result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) -#' preds <- predict(result, matrix(rnorm(500), 100)) +#' result <- mjmcmc.parallel(runs = 1, cores = 1, x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), gaussian.loglik) +#' preds <- predict(result, matrix(rnorm(600), 100)) #' #' @export predict.mjmcmc_parallel <- function (object, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), ...) { @@ -246,13 +244,12 @@ predict.mjmcmc_parallel <- function (object, x, link = function(x) x, quantiles #' runs = 1, #' cores = 1, #' list(populations = "best", complex.measure = 2, tol = 0.0000001), -#' matrix(rnorm(600), 100), +#' x = matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100), #' P = 2, -#' gaussian.loglik, -#' loglik.alpha = gaussian.loglik.alpha, -#' c("p0", "exp_dbl") +#' transforms = c("p0", "exp_dbl") #' ) -#' preds <- predict(result$results, matrix(rnorm(600), 100)) +#' preds <- predict(result, matrix(rnorm(600), 100)) #' #' @export predict.gmjmcmc_parallel <- function (object, x, link = function(x) x, quantiles = c(0.025, 0.5, 0.975), ...) { diff --git a/R/summary.R b/R/summary.R index 3bcb876..80c404a 100644 --- a/R/summary.R +++ b/R/summary.R @@ -14,7 +14,7 @@ #' \item{marg.probs}{Marginal probabilities corresponding to the ordered feature strings.} #' #' @examples -#' result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) #' summary(result, pop = "best") #' #' @export diff --git a/man/marginal.probs.Rd b/man/marginal.probs.Rd index 53df969..39299fb 100644 --- a/man/marginal.probs.Rd +++ b/man/marginal.probs.Rd @@ -16,7 +16,7 @@ A numeric vector of marginal model probabilities based on relative frequencies o Function for calculating marginal inclusion probabilities of features given a list of models } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), P = 2, transforms = c("p0", "exp_dbl")) marginal.probs(result$models[[1]]) } diff --git a/man/predict.gmjmcmc.Rd b/man/predict.gmjmcmc.Rd index f154a4b..58c08b1 100644 --- a/man/predict.gmjmcmc.Rd +++ b/man/predict.gmjmcmc.Rd @@ -39,11 +39,10 @@ Predict using a gmjmcmc result object. } \examples{ result <- gmjmcmc( - matrix(rnorm(600), 100), + x = matrix(rnorm(600), 100), + y = matrix(rnorm(100), 100), P = 2, - gaussian.loglik, - loglik.alpha = gaussian.loglik.alpha, - c("p0", "exp_dbl") + transforms = c("p0", "exp_dbl") ) preds <- predict(result, matrix(rnorm(600), 100)) diff --git a/man/predict.gmjmcmc_merged.Rd b/man/predict.gmjmcmc_merged.Rd index 5679e4c..c698aa7 100644 --- a/man/predict.gmjmcmc_merged.Rd +++ b/man/predict.gmjmcmc_merged.Rd @@ -42,11 +42,10 @@ result <- gmjmcmc.parallel( runs = 1, cores = 1, list(populations = "best", complex.measure = 2, tol = 0.0000001), - matrix(rnorm(600), 100), + x = matrix(rnorm(600), 100), + y = matrix(rnorm(100), 100), P = 2, - gaussian.loglik, - loglik.alpha = gaussian.loglik.alpha, - c("p0", "exp_dbl") + transforms = c("p0", "exp_dbl") ) preds <- predict(result, matrix(rnorm(600), 100)) diff --git a/man/predict.gmjmcmc_parallel.Rd b/man/predict.gmjmcmc_parallel.Rd index f2dd556..b78935b 100644 --- a/man/predict.gmjmcmc_parallel.Rd +++ b/man/predict.gmjmcmc_parallel.Rd @@ -30,12 +30,11 @@ result <- gmjmcmc.parallel( runs = 1, cores = 1, list(populations = "best", complex.measure = 2, tol = 0.0000001), - matrix(rnorm(600), 100), + x = matrix(rnorm(600), 100), + y = matrix(rnorm(100), 100), P = 2, - gaussian.loglik, - loglik.alpha = gaussian.loglik.alpha, - c("p0", "exp_dbl") + transforms = c("p0", "exp_dbl") ) -preds <- predict(result$results, matrix(rnorm(600), 100)) +preds <- predict(result, matrix(rnorm(600), 100)) } diff --git a/man/predict.mjmcmc.Rd b/man/predict.mjmcmc.Rd index 8d6c962..f36afb7 100644 --- a/man/predict.mjmcmc.Rd +++ b/man/predict.mjmcmc.Rd @@ -26,7 +26,7 @@ A list containing aggregated predictions. Predict using a mjmcmc result object. } \examples{ -result <- mjmcmc(matrix(rnorm(600), 100), gaussian.loglik) -preds <- predict(result, matrix(rnorm(500), 100)) +result <- mjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), gaussian.loglik) +preds <- predict(result, matrix(rnorm(600), 100)) } diff --git a/man/predict.mjmcmc_parallel.Rd b/man/predict.mjmcmc_parallel.Rd index cbacd44..6094258 100644 --- a/man/predict.mjmcmc_parallel.Rd +++ b/man/predict.mjmcmc_parallel.Rd @@ -26,7 +26,7 @@ A list containing aggregated predictions. Predict using a mjmcmc result object from a parallel run. } \examples{ -result <- mjmcmc.parallel(runs = 1, cores = 1, matrix(rnorm(600), 100), gaussian.loglik) -preds <- predict(result, matrix(rnorm(500), 100)) +result <- mjmcmc.parallel(runs = 1, cores = 1, x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), gaussian.loglik) +preds <- predict(result, matrix(rnorm(600), 100)) } diff --git a/man/print.feature.Rd b/man/print.feature.Rd index 89a1951..a4cbf2f 100644 --- a/man/print.feature.Rd +++ b/man/print.feature.Rd @@ -34,7 +34,7 @@ String representation of a feature Print method for "feature" class } \examples{ -result <- gmjmcmc(matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), P = 2, transforms = c("p0", "exp_dbl")) print(result$populations[[1]][1]) } diff --git a/man/summary.gmjmcmc.Rd b/man/summary.gmjmcmc.Rd index 1e6d859..af556b5 100644 --- a/man/summary.gmjmcmc.Rd +++ b/man/summary.gmjmcmc.Rd @@ -41,7 +41,7 @@ A data frame containing the following columns: Function to print a quick summary of the results } \examples{ -result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, gaussian.loglik, NULL, c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) summary(result, pop = "best") } -- GitLab From dd5e0b8767f3c297eb3ecfee017310181b229e21 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 9 Apr 2025 18:22:52 +0200 Subject: [PATCH 24/36] some notes resolved --- R/alpha_generation.R | 4 ++-- R/diagnostics.R | 5 ++++- R/feature.R | 5 ++++- R/gmjmcmc.R | 5 ++++- R/gmjmcmc_support.R | 5 ++++- R/likelihoods.R | 12 +++++++++-- R/parallel.R | 5 ++++- R/predict.R | 6 +++++- R/results.R | 35 ++++++++++++++++++++++++++------- R/summary.R | 9 +++++++-- man/compute_effects.Rd | 6 +++++- man/diagn_plot.Rd | 5 ++++- man/fbms.mlik.master.Rd | 7 ++++++- man/get.best.model.Rd | 4 +++- man/glm.logpost.bas.Rd | 5 ++++- man/gmjmcmc.Rd | 5 ++++- man/marginal.probs.Rd | 5 ++++- man/mjmcmc.parallel.Rd | 5 ++++- man/model.string.Rd | 4 +++- man/plot.gmjmcmc.Rd | 5 ++++- man/plot.mjmcmc_parallel.Rd | 6 +++++- man/predict.mjmcmc_parallel.Rd | 6 +++++- man/print.feature.Rd | 5 ++++- man/string.population.Rd | 5 ++++- man/string.population.models.Rd | 5 ++++- man/summary.gmjmcmc.Rd | 5 ++++- man/summary.mjmcmc.Rd | 4 +++- 27 files changed, 142 insertions(+), 36 deletions(-) diff --git a/R/alpha_generation.R b/R/alpha_generation.R index 19ae72a..24587a1 100644 --- a/R/alpha_generation.R +++ b/R/alpha_generation.R @@ -5,7 +5,7 @@ gen.alphas <- function (strategy, feature, data, loglik, verbose) { if (strategy == "deep") feature <- alpha_3(feature, data, loglik, verbose) - else if (strategy == "random") feature <- alpha_4(feature) else stop("Not implemented.") + else if (strategy == "random") feature <- alpha_4(feature, data) else stop("Not implemented.") return(feature) } @@ -71,7 +71,7 @@ alpha_3 <- function (feature, data, loglik, verbose) { #' #' @noRd #' -alpha_4 <- function (feature) { +alpha_4 <- function (feature, data) { # Create the string representation of the feature with variable alphas featfun <- print.feature(feature, dataset = TRUE, fixed = data$fixed, alphas = TRUE) featfun <- set_alphas(featfun) diff --git a/R/diagnostics.R b/R/diagnostics.R index bf1e239..f34629b 100644 --- a/R/diagnostics.R +++ b/R/diagnostics.R @@ -17,7 +17,10 @@ #' @return A list of summary statistics for checking convergence with given confidence intervals #' #' @examples -#' result <- gmjmcmc( y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) +#' result <- gmjmcmc( y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' P = 2, +#' transforms = c("p0", "exp_dbl")) #' diagnstats <- diagn_plot(result) #' #' @export diff --git a/R/feature.R b/R/feature.R index ee161d2..90aa0ae 100644 --- a/R/feature.R +++ b/R/feature.R @@ -105,7 +105,10 @@ update.alphas <- function (feature, alphas, recurse=FALSE) { #' @return String representation of a feature #' #' @examples -#' result <- gmjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), P = 2, transforms = c("p0", "exp_dbl")) +#' result <- gmjmcmc(x = matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100), +#' P = 2, +#' transforms = c("p0", "exp_dbl")) #' print(result$populations[[1]][1]) #' #' @export diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index b66f65e..af2914b 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -40,7 +40,10 @@ NULL #' \item{best}{Best marginal model probability throughout the run, represented as the maximum value in \code{unlist(best.margs)}.} #' #' @examples -#' result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transform = c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' P = 2, +#' transform = c("p0", "exp_dbl")) #' summary(result) #' plot(result) #' diff --git a/R/gmjmcmc_support.R b/R/gmjmcmc_support.R index 4af0d4e..3d970b8 100644 --- a/R/gmjmcmc_support.R +++ b/R/gmjmcmc_support.R @@ -44,7 +44,10 @@ verify.inputs <- function (data, loglik.pi, transforms, T, N, N.final, probs, pa #' @return A numeric vector of marginal model probabilities based on relative frequencies of model visits in MCMC. #' #' @examples -#' result <- gmjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), P = 2, transforms = c("p0", "exp_dbl")) +#' result <- gmjmcmc(x = matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100), +#' P = 2, +#' transforms = c("p0", "exp_dbl")) #' marginal.probs(result$models[[1]]) #' #' @export diff --git a/R/likelihoods.R b/R/likelihoods.R index f645dc1..4158485 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -16,7 +16,10 @@ #' @return A list with the log marginal likelihood combined with the log prior (crit) and the posterior mode of the coefficients (coefs). #' #' @examples -#' glm.logpost.bas(as.integer(rnorm(100) > 0), cbind(1, matrix(rnorm(100))), c(TRUE, TRUE), list(oc = 1)) +#' glm.logpost.bas(as.integer(rnorm(100) > 0), +#' cbind(1, matrix(rnorm(100))), +#' c(TRUE, TRUE), +#' list(oc = 1)) #' #' @importFrom BAS uniform Jeffreys g.prior #' @importFrom stats poisson Gamma glm.control @@ -597,7 +600,12 @@ log_prior <- function (params, complex) { #' \item{coefs}{Posterior mode of the coefficients.} #' #' @examples -#' fbms.mlik.master(y = rnorm(100), x = matrix(rnorm(100)), c(TRUE,TRUE), list(oc = 1),params = list(family = "gaussian", beta_prior = list(type = "g-prior", a = 2), r = exp(-0.5))) +#' fbms.mlik.master(y = rnorm(100), +#' x = matrix(rnorm(100)), +#' c(TRUE,TRUE), +#' list(oc = 1), +#' params = list(family = "gaussian", beta_prior = list(type = "g-prior", a = 2), +#' r = exp(-0.5))) #' #' @importFrom BAS beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform #' @export diff --git a/R/parallel.R b/R/parallel.R index b7a4401..f655262 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -88,7 +88,10 @@ rmclapply <- function(runs, args, fun, mc.cores = NULL) { #' @return Merged results from multiple mjmcmc runs #' #' @examples -#' result <- mjmcmc.parallel(runs = 1, cores = 1, y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100)) +#' result <- mjmcmc.parallel(runs = 1, +#' cores = 1, +#' y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100)) #' summary(result) #' plot(result) #' diff --git a/R/predict.R b/R/predict.R index 9bad036..b120c45 100644 --- a/R/predict.R +++ b/R/predict.R @@ -207,7 +207,11 @@ predict.mjmcmc <- function (object, x, link = function(x) x, quantiles = c(0.025 #' \item{quantiles}{Quantiles of aggregated predictions.} #' #' @examples -#' result <- mjmcmc.parallel(runs = 1, cores = 1, x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), gaussian.loglik) +#' result <- mjmcmc.parallel(runs = 1, +#' cores = 1, +#' x = matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100), +#' gaussian.loglik) #' preds <- predict(result, matrix(rnorm(600), 100)) #' #' @export diff --git a/R/results.R b/R/results.R index 322ef7d..022bd66 100644 --- a/R/results.R +++ b/R/results.R @@ -223,7 +223,9 @@ population.weigths <- function (results, pops.use) { #' @return A character representation of a model #' #' @examples -#' result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' P = 2, transforms = c("p0", "exp_dbl")) #' summary(result) #' plot(result) #' model.string(c(TRUE, FALSE, TRUE, FALSE, TRUE), result$populations[[1]]) @@ -354,7 +356,9 @@ get.mpm.model <- function(result, y, x, labels = F, family = "gaussian", loglik. #' } #' #' @examples -#' result <- gmjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), P = 2, transforms = c("p0", "exp_dbl")) +#' result <- gmjmcmc(x = matrix(rnorm(600), 100), +#' y = matrix(rnorm(100), 100), +#' P = 2, transforms = c("p0", "exp_dbl")) #' get.best.model(result) #' #' @export @@ -419,7 +423,10 @@ get.best.model.mjmcmc <- function (result, labels) { #' @return A matrix of character representations of the features of a model. #' #' @examples -#' result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' P = 2, +#' transforms = c("p0", "exp_dbl")) #' string.population(result$populations[[1]]) #' #' @export @@ -437,7 +444,10 @@ string.population <- function(x, round = 2) { #' @return A matrix of character representations of a list of models. #' #' @examples -#' result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' P = 2, +#' transforms = c("p0", "exp_dbl")) #' string.population.models(result$populations[[2]], result$models[[2]]) #' #' @export @@ -458,7 +468,10 @@ string.population.models <- function(features, models, round = 2, link = "I") { #' @return No return value, just creates a plot #' #' @examples -#' result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' P = 2, +#' transforms = c("p0", "exp_dbl")) #' plot(result) #' #' @@ -533,7 +546,11 @@ marg.prob.plot <- function (feats.strings, marg.probs, count = "all", ...) { #' @return No return value, just creates a plot #' #' @examples -#' result <- mjmcmc.parallel(runs = 1, cores = 1, y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc.parallel(runs = 1, +#' cores = 1, +#' y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' gaussian.loglik) #' plot(result) #' #' @export @@ -608,7 +625,11 @@ plot.gmjmcmc_merged <- function (x, count = "all", pop = NULL,tol = 0.0000001, #' @examples #' #' data <- data.frame(matrix(rnorm(600), 100)) -#' result <- mjmcmc.parallel(runs = 2, cores = 1, y = matrix(rnorm(100), 100),x = data, gaussian.loglik) +#' result <- mjmcmc.parallel(runs = 2, +#' cores = 1, +#' y = matrix(rnorm(100), 100), +#' x = data, +#' gaussian.loglik) #' compute_effects(result,labels = names(data)) #' #' @seealso \code{\link{predict}} diff --git a/R/summary.R b/R/summary.R index 80c404a..f3ac549 100644 --- a/R/summary.R +++ b/R/summary.R @@ -14,7 +14,10 @@ #' \item{marg.probs}{Marginal probabilities corresponding to the ordered feature strings.} #' #' @examples -#' result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) +#' result <- gmjmcmc(y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' P = 2, +#' transforms = c("p0", "exp_dbl")) #' summary(result, pop = "best") #' #' @export @@ -149,7 +152,9 @@ summary.gmjmcmc_merged <- function (object, tol = 0.0001, labels = FALSE, effect #' \item{marg.probs}{Marginal probabilities corresponding to the ordered feature strings.} #' #' @examples -#' result <- mjmcmc( y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc(y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' gaussian.loglik) #' summary(result) #' #' @export diff --git a/man/compute_effects.Rd b/man/compute_effects.Rd index cf540bb..e6c2e96 100644 --- a/man/compute_effects.Rd +++ b/man/compute_effects.Rd @@ -24,7 +24,11 @@ Users can provide custom labels and specify quantiles for the computation of eff \examples{ data <- data.frame(matrix(rnorm(600), 100)) -result <- mjmcmc.parallel(runs = 2, cores = 1, y = matrix(rnorm(100), 100),x = data, gaussian.loglik) +result <- mjmcmc.parallel(runs = 2, +cores = 1, +y = matrix(rnorm(100), 100), +x = data, +gaussian.loglik) compute_effects(result,labels = names(data)) } diff --git a/man/diagn_plot.Rd b/man/diagn_plot.Rd index 2298dcd..82fb235 100644 --- a/man/diagn_plot.Rd +++ b/man/diagn_plot.Rd @@ -26,7 +26,10 @@ A list of summary statistics for checking convergence with given confidence inte Plot convergence of best/median/mean/other summary log posteriors in time } \examples{ -result <- gmjmcmc( y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) +result <- gmjmcmc( y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +P = 2, +transforms = c("p0", "exp_dbl")) diagnstats <- diagn_plot(result) } diff --git a/man/fbms.mlik.master.Rd b/man/fbms.mlik.master.Rd index b6424f9..a6556ea 100644 --- a/man/fbms.mlik.master.Rd +++ b/man/fbms.mlik.master.Rd @@ -68,6 +68,11 @@ This function serves as a unified interface to compute the log marginal likeliho for different regression models and priors by calling specific log likelihood functions. } \examples{ -fbms.mlik.master(y = rnorm(100), x = matrix(rnorm(100)), c(TRUE,TRUE), list(oc = 1),params = list(family = "gaussian", beta_prior = list(type = "g-prior", a = 2), r = exp(-0.5))) +fbms.mlik.master(y = rnorm(100), +x = matrix(rnorm(100)), +c(TRUE,TRUE), +list(oc = 1), +params = list(family = "gaussian", beta_prior = list(type = "g-prior", a = 2), + r = exp(-0.5))) } diff --git a/man/get.best.model.Rd b/man/get.best.model.Rd index 356a775..9b564e5 100644 --- a/man/get.best.model.Rd +++ b/man/get.best.model.Rd @@ -37,7 +37,9 @@ The function identifies the best model by selecting the one with the highest \co } } \examples{ -result <- gmjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), P = 2, transforms = c("p0", "exp_dbl")) +result <- gmjmcmc(x = matrix(rnorm(600), 100), +y = matrix(rnorm(100), 100), +P = 2, transforms = c("p0", "exp_dbl")) get.best.model(result) } diff --git a/man/glm.logpost.bas.Rd b/man/glm.logpost.bas.Rd index 11ae24c..87fdcc8 100644 --- a/man/glm.logpost.bas.Rd +++ b/man/glm.logpost.bas.Rd @@ -34,6 +34,9 @@ This function is created as an example of how to create an estimator that is use to calculate the marginal likelihood of a model. } \examples{ -glm.logpost.bas(as.integer(rnorm(100) > 0), cbind(1, matrix(rnorm(100))), c(TRUE, TRUE), list(oc = 1)) +glm.logpost.bas(as.integer(rnorm(100) > 0), +cbind(1, matrix(rnorm(100))), +c(TRUE, TRUE), +list(oc = 1)) } diff --git a/man/gmjmcmc.Rd b/man/gmjmcmc.Rd index 06202d3..e3087a3 100644 --- a/man/gmjmcmc.Rd +++ b/man/gmjmcmc.Rd @@ -67,7 +67,10 @@ A list containing the following elements: Main algorithm for GMJMCMC (Genetically Modified MJMCMC) } \examples{ -result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transform = c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +P = 2, +transform = c("p0", "exp_dbl")) summary(result) plot(result) diff --git a/man/marginal.probs.Rd b/man/marginal.probs.Rd index 39299fb..cf927ec 100644 --- a/man/marginal.probs.Rd +++ b/man/marginal.probs.Rd @@ -16,7 +16,10 @@ A numeric vector of marginal model probabilities based on relative frequencies o Function for calculating marginal inclusion probabilities of features given a list of models } \examples{ -result <- gmjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), P = 2, transforms = c("p0", "exp_dbl")) +result <- gmjmcmc(x = matrix(rnorm(600), 100), +y = matrix(rnorm(100), 100), +P = 2, +transforms = c("p0", "exp_dbl")) marginal.probs(result$models[[1]]) } diff --git a/man/mjmcmc.parallel.Rd b/man/mjmcmc.parallel.Rd index f4275a0..6b1ad02 100644 --- a/man/mjmcmc.parallel.Rd +++ b/man/mjmcmc.parallel.Rd @@ -20,7 +20,10 @@ Merged results from multiple mjmcmc runs Run multiple mjmcmc runs in parallel, merging the results before returning. } \examples{ -result <- mjmcmc.parallel(runs = 1, cores = 1, y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100)) +result <- mjmcmc.parallel(runs = 1, +cores = 1, +y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100)) summary(result) plot(result) diff --git a/man/model.string.Rd b/man/model.string.Rd index 0f4a158..d2769d1 100644 --- a/man/model.string.Rd +++ b/man/model.string.Rd @@ -22,7 +22,9 @@ A character representation of a model Function to generate a function string for a model consisting of features } \examples{ -result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +P = 2, transforms = c("p0", "exp_dbl")) summary(result) plot(result) model.string(c(TRUE, FALSE, TRUE, FALSE, TRUE), result$populations[[1]]) diff --git a/man/plot.gmjmcmc.Rd b/man/plot.gmjmcmc.Rd index f019a0c..8e0e88c 100644 --- a/man/plot.gmjmcmc.Rd +++ b/man/plot.gmjmcmc.Rd @@ -28,7 +28,10 @@ Function to plot the results, works both for results from gmjmcmc and merged results from merge.results } \examples{ -result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +P = 2, +transforms = c("p0", "exp_dbl")) plot(result) diff --git a/man/plot.mjmcmc_parallel.Rd b/man/plot.mjmcmc_parallel.Rd index 81c1c5e..f957c59 100644 --- a/man/plot.mjmcmc_parallel.Rd +++ b/man/plot.mjmcmc_parallel.Rd @@ -20,7 +20,11 @@ No return value, just creates a plot Plot a mjmcmc_parallel run } \examples{ -result <- mjmcmc.parallel(runs = 1, cores = 1, y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc.parallel(runs = 1, +cores = 1, +y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +gaussian.loglik) plot(result) } diff --git a/man/predict.mjmcmc_parallel.Rd b/man/predict.mjmcmc_parallel.Rd index 6094258..ba5a9f8 100644 --- a/man/predict.mjmcmc_parallel.Rd +++ b/man/predict.mjmcmc_parallel.Rd @@ -26,7 +26,11 @@ A list containing aggregated predictions. Predict using a mjmcmc result object from a parallel run. } \examples{ -result <- mjmcmc.parallel(runs = 1, cores = 1, x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), gaussian.loglik) +result <- mjmcmc.parallel(runs = 1, +cores = 1, +x = matrix(rnorm(600), 100), +y = matrix(rnorm(100), 100), +gaussian.loglik) preds <- predict(result, matrix(rnorm(600), 100)) } diff --git a/man/print.feature.Rd b/man/print.feature.Rd index a4cbf2f..8c2b992 100644 --- a/man/print.feature.Rd +++ b/man/print.feature.Rd @@ -34,7 +34,10 @@ String representation of a feature Print method for "feature" class } \examples{ -result <- gmjmcmc(x = matrix(rnorm(600), 100),y = matrix(rnorm(100), 100), P = 2, transforms = c("p0", "exp_dbl")) +result <- gmjmcmc(x = matrix(rnorm(600), 100), +y = matrix(rnorm(100), 100), +P = 2, +transforms = c("p0", "exp_dbl")) print(result$populations[[1]][1]) } diff --git a/man/string.population.Rd b/man/string.population.Rd index 9ff8868..fb6c21b 100644 --- a/man/string.population.Rd +++ b/man/string.population.Rd @@ -18,7 +18,10 @@ A matrix of character representations of the features of a model. Function to get a character representation of a list of features } \examples{ -result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +P = 2, +transforms = c("p0", "exp_dbl")) string.population(result$populations[[1]]) } diff --git a/man/string.population.models.Rd b/man/string.population.models.Rd index 492b886..f287ac7 100644 --- a/man/string.population.models.Rd +++ b/man/string.population.models.Rd @@ -22,7 +22,10 @@ A matrix of character representations of a list of models. Function to get a character representation of a list of models } \examples{ -result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +P = 2, +transforms = c("p0", "exp_dbl")) string.population.models(result$populations[[2]], result$models[[2]]) } diff --git a/man/summary.gmjmcmc.Rd b/man/summary.gmjmcmc.Rd index af556b5..bc1d172 100644 --- a/man/summary.gmjmcmc.Rd +++ b/man/summary.gmjmcmc.Rd @@ -41,7 +41,10 @@ A data frame containing the following columns: Function to print a quick summary of the results } \examples{ -result <- gmjmcmc(y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), P = 2, transforms = c("p0", "exp_dbl")) +result <- gmjmcmc(y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +P = 2, +transforms = c("p0", "exp_dbl")) summary(result, pop = "best") } diff --git a/man/summary.mjmcmc.Rd b/man/summary.mjmcmc.Rd index 70dcf96..8c11cee 100644 --- a/man/summary.mjmcmc.Rd +++ b/man/summary.mjmcmc.Rd @@ -35,7 +35,9 @@ A data frame containing the following columns: Function to print a quick summary of the results } \examples{ -result <- mjmcmc( y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc(y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +gaussian.loglik) summary(result) } -- GitLab From 7f11a5e03da7f5e2bba85c0d16d162e0c56ed83b Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 9 Apr 2025 18:28:50 +0200 Subject: [PATCH 25/36] more notes resolved --- NAMESPACE | 1 + R/fbms.R | 8 -------- R/likelihoods.R | 2 +- R/likelihoods2.R | 4 ++-- R/summary.R | 6 +++++- man/summary.mjmcmc_parallel.Rd | 6 +++++- 6 files changed, 14 insertions(+), 13 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index fa98675..153607b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -100,6 +100,7 @@ importFrom(BAS,hypergeometric1F1) importFrom(BAS,hypergeometric2F1) importFrom(BAS,intrinsic) importFrom(BAS,phi1) +importFrom(BAS,robust) importFrom(BAS,tCCH) importFrom(BAS,uniform) importFrom(GenSA,GenSA) diff --git a/R/fbms.R b/R/fbms.R index 98a53f2..ba5056b 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -179,14 +179,6 @@ fbms <- function ( return(res) } -fbms.mlpost.master <- function (y, x, model, complex, params = list(family = "gaussian", beta_prior = list(type = "g-prior"), r = exp(-0.5))) { - params_use <- params - params_use$beta_prior <- gen.mlpost.params(params$beta_prior$type, params$beta_prior, ncol(x), nrow(x)) - params_use$beta_prior$type <- params$beta_prior$type - loglik.pi <- select.mlpost.fun(params$beta_prior$type, params$family) - return(loglik.pi(y, x, model, complex, params_use)) -} - gen.mlpost.params.glm <- function (beta_prior, user_params, p, n) { if(beta_prior == "Jeffreys-BIC") diff --git a/R/likelihoods.R b/R/likelihoods.R index 4158485..01fdb8e 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -607,7 +607,7 @@ log_prior <- function (params, complex) { #' params = list(family = "gaussian", beta_prior = list(type = "g-prior", a = 2), #' r = exp(-0.5))) #' -#' @importFrom BAS beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform +#' @importFrom BAS robust beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform #' @export fbms.mlik.master <- function(y, x, model, complex, params = list(family = "gaussian", beta_prior = list(type = "g-prior"), r = exp(-0.5))) { # Extract dimensions diff --git a/R/likelihoods2.R b/R/likelihoods2.R index 6a9f8cb..0b4d7b9 100644 --- a/R/likelihoods2.R +++ b/R/likelihoods2.R @@ -611,7 +611,7 @@ log_prior <- function (params, complex) { #' @examples #' fbms.mlik.master(rnorm(100), matrix(rnorm(100)), c(TRUE,TRUE), list(oc = 1)) #' -#' @importFrom BAS beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform +#' @importFrom BAS robust beta.prime bic.prior CCH EB.local g.prior hyper.g hyper.g.n tCCH intrinsic TG Jeffreys uniform #' @export fbms.mlik.master_old <- function(y, x, model, complex, params = list(family = "gaussian", prior_beta = "g-prior", r = exp(-0.5))) { # Extract dimensions @@ -658,7 +658,7 @@ fbms.mlik.master_old <- function(y, x, model, complex, params = list(family = "g if (params_master$prior_beta %in% gaussian_only_priors) { stop(sprintf("Prior '%s' is not supported for GLM family '%s'. Supported GLM priors: %s", params_master$prior_beta, params_master$family, - paste(c(glm_only_priors, glm_and_gaussian_priors), collapse = ", "))) + paste(c(glm_and_gaussian_priors), collapse = ", "))) } params_nested$family <- params_master$family diff --git a/R/summary.R b/R/summary.R index f3ac549..ecd49fd 100644 --- a/R/summary.R +++ b/R/summary.R @@ -182,7 +182,11 @@ summary.mjmcmc <- function (object, tol = 0.0001, labels = FALSE, effects = NULL #' \item{marg.probs}{Marginal probabilities corresponding to the ordered feature strings.} #' #' @examples -#' result <- mjmcmc.parallel(runs = 1, cores = 1, y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) +#' result <- mjmcmc.parallel(runs = 1, +#' cores = 1, +#' y = matrix(rnorm(100), 100), +#' x = matrix(rnorm(600), 100), +#' gaussian.loglik) #' summary(result) #' #' @export diff --git a/man/summary.mjmcmc_parallel.Rd b/man/summary.mjmcmc_parallel.Rd index 23250d6..2c77b2a 100644 --- a/man/summary.mjmcmc_parallel.Rd +++ b/man/summary.mjmcmc_parallel.Rd @@ -35,7 +35,11 @@ A data frame containing the following columns: Function to print a quick summary of the results } \examples{ -result <- mjmcmc.parallel(runs = 1, cores = 1, y = matrix(rnorm(100), 100),x = matrix(rnorm(600), 100), gaussian.loglik) +result <- mjmcmc.parallel(runs = 1, +cores = 1, +y = matrix(rnorm(100), 100), +x = matrix(rnorm(600), 100), +gaussian.loglik) summary(result) } -- GitLab From e07500dc23edc686033b8597a36e057bb154b35b Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 9 Apr 2025 18:43:38 +0200 Subject: [PATCH 26/36] all warnings fixed, I strongly do not like print.feature to be dependent on fixed. If the function is exported the user should never have any idea about fixed if he runs fbms. --- R/feature.R | 1 + R/gmjmcmc.R | 10 ++++++---- R/mjmcmc.R | 8 +++++--- man/gmjmcmc.Rd | 16 +++++++++++----- man/gmjmcmc.parallel.Rd | 6 ++++++ man/mjmcmc.Rd | 14 ++++++++++---- man/print.feature.Rd | 2 ++ 7 files changed, 41 insertions(+), 16 deletions(-) diff --git a/R/feature.R b/R/feature.R index 90aa0ae..05c3507 100644 --- a/R/feature.R +++ b/R/feature.R @@ -97,6 +97,7 @@ update.alphas <- function (feature, alphas, recurse=FALSE) { #' #' @param x An object of class "feature" #' @param dataset Set the regular covariates as columns in a dataset +#' @param fixed How many of the first columns in dataset are fixed and do not contribute to variable selection #' @param alphas Print a "?" instead of actual alphas to prepare the output for alpha estimation #' @param labels Should the covariates be named, or just referred to as their place in the data.frame. #' @param round Should numbers be rounded when printing? Default is FALSE, otherwise it can be set to the number of decimal places. diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index af2914b..ab5f8a2 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -10,9 +10,11 @@ NULL #' Main algorithm for GMJMCMC (Genetically Modified MJMCMC) #' -#' @param data A matrix containing the data to use in the algorithm, -#' first column should be the dependent variable, -#' and the rest of the columns should be the independent variables. +#' @param x matrix containing the design matrix with data to use in the algorithm, +#' @param y response variable +#' @param mlpost_params parameters for the estimator function loglik.pi +#' @param intercept whether intercept should be added to the design matrix (no model selection for intercept) +#' @param fixed how many of the first columns of the design matrix will always be included in the models #' @param loglik.pi The (log) density to explore #' @param loglik.alpha The likelihood function to use for alpha calculation #' @param transforms A Character vector including the names of the non-linear functions to be used by the modification @@ -55,7 +57,7 @@ gmjmcmc <- function ( loglik.alpha = gaussian.loglik.alpha, mlpost_params = NULL, transforms, - intercept = FALSE, + intercept = TRUE, fixed = 0, P = 10, N.init = 100, diff --git a/R/mjmcmc.R b/R/mjmcmc.R index 786b5aa..f010568 100644 --- a/R/mjmcmc.R +++ b/R/mjmcmc.R @@ -5,9 +5,11 @@ #' Main algorithm for MJMCMC (Genetically Modified MJMCMC) #' -#' @param data A matrix containing the data to use in the algorithm, -#' first column should be the dependent variable, -#' and the rest of the columns should be the independent variables. +#' @param x matrix containing the design matrix with data to use in the algorithm, +#' @param y response variable +#' @param mlpost_params parameters for the estimator function loglik.pi +#' @param intercept whether intercept should be added to the design matrix (no model selection for intercept) +#' @param fixed how many of the first columns of the design matrix will always be included in the models #' @param loglik.pi The (log) density to explore #' @param N The number of iterations to run for #' @param probs A list of the various probability vectors to use diff --git a/man/gmjmcmc.Rd b/man/gmjmcmc.Rd index e3087a3..a91a5a3 100644 --- a/man/gmjmcmc.Rd +++ b/man/gmjmcmc.Rd @@ -11,7 +11,7 @@ gmjmcmc( loglik.alpha = gaussian.loglik.alpha, mlpost_params = NULL, transforms, - intercept = FALSE, + intercept = TRUE, fixed = 0, P = 10, N.init = 100, @@ -23,13 +23,23 @@ gmjmcmc( ) } \arguments{ +\item{x}{matrix containing the design matrix with data to use in the algorithm,} + +\item{y}{response variable} + \item{loglik.pi}{The (log) density to explore} \item{loglik.alpha}{The likelihood function to use for alpha calculation} +\item{mlpost_params}{parameters for the estimator function loglik.pi} + \item{transforms}{A Character vector including the names of the non-linear functions to be used by the modification and the projection operator.} +\item{intercept}{whether intercept should be added to the design matrix (no model selection for intercept)} + +\item{fixed}{how many of the first columns of the design matrix will always be included in the models} + \item{P}{The number of generations for GMJMCMC (Genetically Modified MJMCMC). The default value is $P = 10$. A larger value like $P = 50$ might be more realistic for more complicated examples where one expects a lot of non-linear structures.} @@ -45,10 +55,6 @@ A larger value like $P = 50$ might be more realistic for more complicated exampl \item{sub}{An indicator that if the likelihood is inexact and should be improved each model visit (EXPERIMENTAL!)} \item{verbose}{A logical denoting if messages should be printed} - -\item{data}{A matrix containing the data to use in the algorithm, -first column should be the dependent variable, -and the rest of the columns should be the independent variables.} } \value{ A list containing the following elements: diff --git a/man/gmjmcmc.parallel.Rd b/man/gmjmcmc.parallel.Rd index 2897678..8ef9c18 100644 --- a/man/gmjmcmc.parallel.Rd +++ b/man/gmjmcmc.parallel.Rd @@ -24,10 +24,16 @@ gmjmcmc.parallel( \item{merge.options}{A list of options to pass to the \code{\link[=merge_results]{merge_results()}} function run after the} +\item{x}{matrix containing the design matrix with data to use in the algorithm,} + +\item{y}{response variable} + \item{loglik.pi}{The (log) density to explore} \item{loglik.alpha}{The likelihood function to use for alpha calculation} +\item{mlpost_params}{parameters for the estimator function loglik.pi} + \item{transforms}{A Character vector including the names of the non-linear functions to be used by the modification and the projection operator.} diff --git a/man/mjmcmc.Rd b/man/mjmcmc.Rd index ac4b908..b35b62e 100644 --- a/man/mjmcmc.Rd +++ b/man/mjmcmc.Rd @@ -19,8 +19,18 @@ mjmcmc( ) } \arguments{ +\item{x}{matrix containing the design matrix with data to use in the algorithm,} + +\item{y}{response variable} + \item{loglik.pi}{The (log) density to explore} +\item{mlpost_params}{parameters for the estimator function loglik.pi} + +\item{intercept}{whether intercept should be added to the design matrix (no model selection for intercept)} + +\item{fixed}{how many of the first columns of the design matrix will always be included in the models} + \item{N}{The number of iterations to run for} \item{probs}{A list of the various probability vectors to use} @@ -30,10 +40,6 @@ mjmcmc( \item{sub}{An indicator that if the likelihood is inexact and should be improved each model visit (EXPERIMENTAL!)} \item{verbose}{A logical denoting if messages should be printed} - -\item{data}{A matrix containing the data to use in the algorithm, -first column should be the dependent variable, -and the rest of the columns should be the independent variables.} } \value{ A list containing the following elements: diff --git a/man/print.feature.Rd b/man/print.feature.Rd index 8c2b992..4ad803c 100644 --- a/man/print.feature.Rd +++ b/man/print.feature.Rd @@ -19,6 +19,8 @@ \item{dataset}{Set the regular covariates as columns in a dataset} +\item{fixed}{How many of the first columns in dataset are fixed and do not contribute to variable selection} + \item{alphas}{Print a "?" instead of actual alphas to prepare the output for alpha estimation} \item{labels}{Should the covariates be named, or just referred to as their place in the data.frame.} -- GitLab From d1839d1afd779f5614053bf720a4b270847d3e5d Mon Sep 17 00:00:00 2001 From: aliaksah Date: Wed, 9 Apr 2025 18:48:34 +0200 Subject: [PATCH 27/36] import fbms terms fixed --- NAMESPACE | 1 + R/fbms.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 153607b..3571fc9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -139,6 +139,7 @@ importFrom(stats,rbinom) importFrom(stats,rnorm) importFrom(stats,runif) importFrom(stats,sd) +importFrom(stats,terms) importFrom(stats,var) importFrom(tolerance,F1) importFrom(utils,sessionInfo) diff --git a/R/fbms.R b/R/fbms.R index ba5056b..213da8f 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -62,6 +62,7 @@ #' #' @seealso \code{\link{mjmcmc}}, \code{\link{gmjmcmc}}, \code{\link{gmjmcmc.parallel}} #' @export +#' @importFrom stats terms fbms <- function ( formula = NULL, family = "gaussian", -- GitLab From 0dbcccf226030605f3c038db7a675c945422188d Mon Sep 17 00:00:00 2001 From: aliaksah Date: Fri, 11 Apr 2025 15:13:03 +0200 Subject: [PATCH 28/36] #something is wrong with the names! summary(result.lin) --- R/fbms.R | 2 +- R/gmjmcmc.R | 4 ++-- R/likelihoods.R | 5 +++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/R/fbms.R b/R/fbms.R index 213da8f..239ba71 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -66,7 +66,7 @@ fbms <- function ( formula = NULL, family = "gaussian", - beta_prior = list(type = "g-prior"), + beta_prior = list(type = "robust"), model_prior = NULL, data = NULL, impute = FALSE, diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index ab5f8a2..b0682dd 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -53,9 +53,9 @@ NULL gmjmcmc <- function ( x, y, - loglik.pi = gaussian.loglik, + loglik.pi = fbms.mlik.master, loglik.alpha = gaussian.loglik.alpha, - mlpost_params = NULL, + mlpost_params = list(family = "gaussian",beta_prior = list(type = "robust")), transforms, intercept = TRUE, fixed = 0, diff --git a/R/likelihoods.R b/R/likelihoods.R index 01fdb8e..7d2afcd 100644 --- a/R/likelihoods.R +++ b/R/likelihoods.R @@ -615,6 +615,9 @@ fbms.mlik.master <- function(y, x, model, complex, params = list(family = "gauss p <- sum(model) - 1 # Number of predictors excluding intercept params_use <- list() + if(length(params$r) == 0) + params$r = 1/length(y) + if(params$family == "gaussian") params_use$beta_prior <- gen.mlpost.params.lm(params$beta_prior$type, params$beta_prior, p+1, n) else @@ -623,6 +626,8 @@ fbms.mlik.master <- function(y, x, model, complex, params = list(family = "gauss params_use$family <- params$family } + + loglik.pi <- select.mlpost.fun(params$beta_prior$type, params$family) result <- loglik.pi(y,x,model,complex,params_use) -- GitLab From 471e964178f5c8b2470527b9185235aeb1052a1c Mon Sep 17 00:00:00 2001 From: aliaksah Date: Fri, 11 Apr 2025 19:31:40 +0200 Subject: [PATCH 29/36] more defauls updated --- R/mjmcmc.R | 4 ++-- tests_current/Ex2_Sec4_1.R | 14 ++++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/R/mjmcmc.R b/R/mjmcmc.R index f010568..2c84566 100644 --- a/R/mjmcmc.R +++ b/R/mjmcmc.R @@ -36,8 +36,8 @@ mjmcmc <- function ( x, y, - loglik.pi = gaussian.loglik, - mlpost_params = NULL, + loglik.pi = fbms.mlik.master, + mlpost_params = list(family = "gaussian",beta_prior = list(type = "robust")), intercept = FALSE, fixed = 0, N = 100, diff --git a/tests_current/Ex2_Sec4_1.R b/tests_current/Ex2_Sec4_1.R index de32a65..75d4e41 100644 --- a/tests_current/Ex2_Sec4_1.R +++ b/tests_current/Ex2_Sec4_1.R @@ -43,7 +43,7 @@ X<-scale(X)/sqrt(n) df <- as.data.frame(cbind(y, X)) -correct.model +correct.model + 1 beta.k @@ -55,7 +55,7 @@ if (use.fbms) { result <- result <- fbms(data = df, method = "gmjmcmc", transforms = transforms, P = 40) } else { - result <- gmjmcmc(df, gaussian.loglik, gaussian.loglik.alpha, transforms, P = 40) + result <- gmjmcmc(x = df[-1],y=df[,1], mlpost_params = list(family = "gaussian", beta_prior = list(type = "robust")), transforms = transforms, P = 40) } summary(result) @@ -65,7 +65,7 @@ if (use.fbms) { result2 <- result <- fbms(data = df, method = "gmjmcmc", transforms = transforms, N.init = 1000, N.final = 5000, P = 40) } else { - result2 <- gmjmcmc(df, transforms = transforms, + result2 <- gmjmcmc(y = df[,1],x = df[-1], transforms = transforms, mlpost_params = list(family = "gaussian", beta_prior = list(type = "robust")), N.init = 1000, N.final = 5000, P = 40) } summary(result2) @@ -93,16 +93,18 @@ params.lin$loglik$r <- 1/dim(df)[1] if (use.fbms) { result.lin <- fbms(data = df, N = 5000) } else { - result.lin <- mjmcmc(df, N = 5000, probs = probs.lin, + result.lin <- mjmcmc(y = df[,1],x = df[,-1], N = 5000, probs = probs.lin, params = params.lin) } plot(result.lin) + +#something is wrong with the names, Jon! summary(result.lin) -correct.model +correct.model beta.k @@ -112,7 +114,7 @@ set.seed(123) if (use.fbms) { result.lindef <- fbms(data = df) } else { - result.lindef <- mjmcmc(df) + result.lindef <- mjmcmc(x = df[,-1],y = df[,1]) } -- GitLab From a4939ccf65c33711969bb66f0b2d2ba917aed2df Mon Sep 17 00:00:00 2001 From: aliaksah Date: Fri, 11 Apr 2025 19:47:26 +0200 Subject: [PATCH 30/36] fix examples --- R/parallel.R | 2 ++ man/fbms.Rd | 2 +- man/gmjmcmc.Rd | 4 ++-- man/gmjmcmc.parallel.Rd | 1 + man/mjmcmc.Rd | 4 ++-- man/mjmcmc.parallel.Rd | 1 + 6 files changed, 9 insertions(+), 5 deletions(-) diff --git a/R/parallel.R b/R/parallel.R index f655262..d0eb1c1 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -90,6 +90,7 @@ rmclapply <- function(runs, args, fun, mc.cores = NULL) { #' @examples #' result <- mjmcmc.parallel(runs = 1, #' cores = 1, +#' loglik.pi = FBMS::gaussian.loglik, #' y = matrix(rnorm(100), 100), #' x = matrix(rnorm(600), 100)) #' summary(result) @@ -119,6 +120,7 @@ mjmcmc.parallel <- function(runs = 2, cores = getOption("mc.cores", 2L), ...) { #' result <- gmjmcmc.parallel( #' runs = 1, #' cores = 1, +#' loglik.pi = FBMS::gaussian.loglik, #' y = matrix(rnorm(100), 100), #' x = matrix(rnorm(600), 100), #' transforms = c("p0", "exp_dbl") diff --git a/man/fbms.Rd b/man/fbms.Rd index 210b1df..ea3b282 100644 --- a/man/fbms.Rd +++ b/man/fbms.Rd @@ -8,7 +8,7 @@ Or Fit a BGLM model using Modified Mode Jumping Markov Chain Monte Carlo (MCMC) fbms( formula = NULL, family = "gaussian", - beta_prior = list(type = "g-prior"), + beta_prior = list(type = "robust"), model_prior = NULL, data = NULL, impute = FALSE, diff --git a/man/gmjmcmc.Rd b/man/gmjmcmc.Rd index a91a5a3..45d9c30 100644 --- a/man/gmjmcmc.Rd +++ b/man/gmjmcmc.Rd @@ -7,9 +7,9 @@ gmjmcmc( x, y, - loglik.pi = gaussian.loglik, + loglik.pi = fbms.mlik.master, loglik.alpha = gaussian.loglik.alpha, - mlpost_params = NULL, + mlpost_params = list(family = "gaussian", beta_prior = list(type = "robust")), transforms, intercept = TRUE, fixed = 0, diff --git a/man/gmjmcmc.parallel.Rd b/man/gmjmcmc.parallel.Rd index 8ef9c18..6efa65c 100644 --- a/man/gmjmcmc.parallel.Rd +++ b/man/gmjmcmc.parallel.Rd @@ -49,6 +49,7 @@ Run multiple gmjmcmc (Genetically Modified MJMCMC) runs in parallel returning a result <- gmjmcmc.parallel( runs = 1, cores = 1, + loglik.pi = FBMS::gaussian.loglik, y = matrix(rnorm(100), 100), x = matrix(rnorm(600), 100), transforms = c("p0", "exp_dbl") diff --git a/man/mjmcmc.Rd b/man/mjmcmc.Rd index b35b62e..0b16005 100644 --- a/man/mjmcmc.Rd +++ b/man/mjmcmc.Rd @@ -7,8 +7,8 @@ mjmcmc( x, y, - loglik.pi = gaussian.loglik, - mlpost_params = NULL, + loglik.pi = fbms.mlik.master, + mlpost_params = list(family = "gaussian", beta_prior = list(type = "robust")), intercept = FALSE, fixed = 0, N = 100, diff --git a/man/mjmcmc.parallel.Rd b/man/mjmcmc.parallel.Rd index 6b1ad02..f25fab9 100644 --- a/man/mjmcmc.parallel.Rd +++ b/man/mjmcmc.parallel.Rd @@ -22,6 +22,7 @@ Run multiple mjmcmc runs in parallel, merging the results before returning. \examples{ result <- mjmcmc.parallel(runs = 1, cores = 1, +loglik.pi = FBMS::gaussian.loglik, y = matrix(rnorm(100), 100), x = matrix(rnorm(600), 100)) summary(result) -- GitLab From a82df2d249374ec474e541e2b3c27338ea5f2396 Mon Sep 17 00:00:00 2001 From: aliaksah Date: Fri, 11 Apr 2025 19:49:26 +0200 Subject: [PATCH 31/36] g default --- R/mjmcmc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mjmcmc.R b/R/mjmcmc.R index 2c84566..6bdc01c 100644 --- a/R/mjmcmc.R +++ b/R/mjmcmc.R @@ -37,7 +37,7 @@ mjmcmc <- function ( x, y, loglik.pi = fbms.mlik.master, - mlpost_params = list(family = "gaussian",beta_prior = list(type = "robust")), + mlpost_params = list(family = "gaussian",beta_prior = list(type = "g")), intercept = FALSE, fixed = 0, N = 100, -- GitLab From c381b9ed26c91282582ce45d529296daa42baadb Mon Sep 17 00:00:00 2001 From: aliaksah Date: Fri, 11 Apr 2025 19:49:57 +0200 Subject: [PATCH 32/36] g default --- R/fbms.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/fbms.R b/R/fbms.R index 239ba71..d752bdc 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -66,7 +66,7 @@ fbms <- function ( formula = NULL, family = "gaussian", - beta_prior = list(type = "robust"), + beta_prior = list(type = "g"), model_prior = NULL, data = NULL, impute = FALSE, -- GitLab From 5f26727c07d901fa30927ca63da5acfd5eb570a6 Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Fri, 11 Apr 2025 20:18:27 +0200 Subject: [PATCH 33/36] * Update tests to respect new defaults etc. --- R/arguments.R | 2 +- R/fbms.R | 2 +- R/gmjmcmc.R | 2 +- R/mjmcmc.R | 2 +- tests/testthat/test_fbms.R | 24 ++++++++++++------------ tests/testthat/test_mjmcmc.R | 21 +++++++++++---------- 6 files changed, 27 insertions(+), 26 deletions(-) diff --git a/R/arguments.R b/R/arguments.R index 93642b5..7785019 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -322,7 +322,7 @@ gen.params.gmjmcmc <- function (data) { params <- gen.params.mjmcmc(data) ## Get the dimensions of the data to set parameters based on it - if(is.list(data) && length(data$fixed)>0 && length(data$x) > 0) + if (is.list(data) && length(data$fixed)>0 && length(data$x) > 0) ncov <- ncol(data$x) - data$fixed #This is not ok as for fbms no one knows anything about fixed and $x! else # I fixed that, but this is somewhat not elegant still { diff --git a/R/fbms.R b/R/fbms.R index d752bdc..213da8f 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -66,7 +66,7 @@ fbms <- function ( formula = NULL, family = "gaussian", - beta_prior = list(type = "g"), + beta_prior = list(type = "g-prior"), model_prior = NULL, data = NULL, impute = FALSE, diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index b0682dd..1943212 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -55,7 +55,7 @@ gmjmcmc <- function ( y, loglik.pi = fbms.mlik.master, loglik.alpha = gaussian.loglik.alpha, - mlpost_params = list(family = "gaussian",beta_prior = list(type = "robust")), + mlpost_params = list(family = "gaussian", beta_prior = list(type = "robust")), transforms, intercept = TRUE, fixed = 0, diff --git a/R/mjmcmc.R b/R/mjmcmc.R index 6bdc01c..58dfc3f 100644 --- a/R/mjmcmc.R +++ b/R/mjmcmc.R @@ -37,7 +37,7 @@ mjmcmc <- function ( x, y, loglik.pi = fbms.mlik.master, - mlpost_params = list(family = "gaussian",beta_prior = list(type = "g")), + mlpost_params = list(family = "gaussian", beta_prior = list(type = "g-prior")), intercept = FALSE, fixed = 0, N = 100, diff --git a/tests/testthat/test_fbms.R b/tests/testthat/test_fbms.R index 6affc4b..e7ce460 100644 --- a/tests/testthat/test_fbms.R +++ b/tests/testthat/test_fbms.R @@ -54,27 +54,27 @@ test_that("Test (G)MJMCMC through the fbms function", { # No intercept data <- as.data.frame(cbind(y, x)) colnames(data) <- c("y", "a", "b", "c") - mod1 <- fbms(y ~ . - 1, family = "gaussian", method = "mjmcmc", data = data, verbose = FALSE) - mod1p <- fbms(y ~ . - 1, family = "gaussian", method = "mjmcmc.parallel", data = data, verbose = FALSE) + mod1 <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "mjmcmc", data = data, verbose = FALSE) + mod1p <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "mjmcmc.parallel", data = data, verbose = FALSE) validate.model(mod1, x, y) validate.model(mod1p, x, y) data$y <- y_sin - gmod1 <- fbms(y ~ . - 1, family = "gaussian", method = "gmjmcmc", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) - gmod1p <- fbms(y ~ . - 1, family = "gaussian", method = "gmjmcmc.parallel", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) + gmod1 <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "gmjmcmc", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) + gmod1p <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "gmjmcmc.parallel", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) validate.gmodel(gmod1, x, y_sin) validate.gmodel(gmod1p, x, y_sin) # Model defined intercept data$y <- y_shift - mod2 <- fbms(y ~ ., family = "gaussian", method = "mjmcmc", data = data, verbose = FALSE) - mod2p <- fbms(y ~ ., family = "gaussian", method = "mjmcmc.parallel", data = data, verbose = FALSE) + mod2 <- fbms(y ~ ., family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "mjmcmc", data = data, verbose = FALSE) + mod2p <- fbms(y ~ ., family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "mjmcmc.parallel", data = data, verbose = FALSE) validate.model(mod2, x, y_shift) validate.model(mod2p, x, y_shift) data$y <- y_sin_shift - gmod2 <- fbms(y ~ ., family = "gaussian", method = "gmjmcmc", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) - gmod2p <- fbms(y ~ ., family = "gaussian", method = "gmjmcmc.parallel", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) + gmod2 <- fbms(y ~ ., family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "gmjmcmc", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) + gmod2p <- fbms(y ~ ., family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "gmjmcmc.parallel", data = data, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) validate.gmodel(gmod2, x, y_sin_shift) validate.gmodel(gmod2p, x, y_sin_shift) @@ -82,14 +82,14 @@ test_that("Test (G)MJMCMC through the fbms function", { data <- cbind(data[, 1], 1, data[, -1]) colnames(data) <- c("y", "const", "a", "b", "c") data$y <- y_shift - mod3 <- fbms(y ~ . - 1, family = "gaussian", method = "mjmcmc", data = data, fixed = 1, verbose = FALSE) - mod3p <- fbms(y ~ . - 1, family = "gaussian", method = "mjmcmc.parallel", data = data, fixed = 1, verbose = FALSE) + mod3 <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "mjmcmc", data = data, fixed = 1, verbose = FALSE) + mod3p <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "mjmcmc.parallel", data = data, fixed = 1, verbose = FALSE) validate.model(mod3, cbind(1, x), y_shift) validate.model(mod3p, cbind(1, x), y_shift) data$y <- y_sin_shift - gmod3 <- fbms(y ~ . - 1, family = "gaussian", method = "gmjmcmc", data = data, transforms = "sin", params = params, probs = probs, P = 20, fixed = 1, verbose = FALSE) - gmod3p <- fbms(y ~ . - 1, family = "gaussian", method = "gmjmcmc.parallel", data = data, transforms = "sin", params = params, probs = probs, P = 20, fixed = 1, verbose = FALSE) + gmod3 <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "gmjmcmc", data = data, transforms = "sin", params = params, probs = probs, P = 20, fixed = 1, verbose = FALSE) + gmod3p <- fbms(y ~ . - 1, family = "gaussian", beta_prior = list(type = "Jeffreys-BIC"), method = "gmjmcmc.parallel", data = data, transforms = "sin", params = params, probs = probs, P = 20, fixed = 1, verbose = FALSE) validate.gmodel(gmod3, cbind(1, x), y_sin_shift) validate.gmodel(gmod3p, cbind(1, x), y_sin_shift) }) diff --git a/tests/testthat/test_mjmcmc.R b/tests/testthat/test_mjmcmc.R index 14a1bb2..e6b243f 100644 --- a/tests/testthat/test_mjmcmc.R +++ b/tests/testthat/test_mjmcmc.R @@ -50,33 +50,34 @@ test_that("Test (G)MJMCMC", { probs$gen <- c(0, 1, 0, 0) params$feat$D <- 1 params$feat$L <- 2 + mlpost_params = list(family = "gaussian", beta_prior = list(type = "Jeffreys-BIC")) # No intercept - mod1 <- mjmcmc(x, y, gaussian.loglik) - mod1p <- mjmcmc.parallel(2, 2, x, y, gaussian.loglik) + mod1 <- mjmcmc(x, y, gaussian.loglik, mlpost_params, intercept = FALSE) + mod1p <- mjmcmc.parallel(2, 2, x, y, gaussian.loglik, mlpost_params, intercept = FALSE) validate.model(mod1, x, y) validate.model(mod1p, x, y) - gmod1 <- gmjmcmc(x, y_sin, gaussian.loglik, transforms = "sin", params = params, probs = probs, P = 20, verbose = FALSE) - gmod1p <- gmjmcmc.parallel(2, 2, x = x, y = y_sin, loglik.pi = gaussian.loglik, transforms = "sin", params = params, probs = probs, verbose = FALSE) + gmod1 <- gmjmcmc(x, y_sin, gaussian.loglik, mlpost_params = mlpost_params, transforms = "sin", params = params, probs = probs, P = 20, intercept = FALSE, verbose = FALSE) + gmod1p <- gmjmcmc.parallel(2, 2, x = x, y = y_sin, loglik.pi = gaussian.loglik, mlpost_params = mlpost_params, transforms = "sin", params = params, probs = probs, intercept = FALSE, verbose = FALSE) validate.gmodel(gmod1, x, y_sin) validate.gmodel(gmod1p, x, y_sin) # Model defined intercept - mod2 <- mjmcmc(x, y_shift, gaussian.loglik, intercept = TRUE) + mod2 <- mjmcmc(x, y_shift, gaussian.loglik, mlpost_params, intercept = TRUE) validate.model(mod2, x, y_shift) - gmod2 <- gmjmcmc(x, y_sin_shift, gaussian.loglik, transforms = "sin", params = params, probs = probs, intercept = TRUE, P = 20, verbose = FALSE) - gmod2p <- gmjmcmc.parallel(2, 2, x = x, y = y_sin_shift, loglik.pi = gaussian.loglik, transforms = "sin", params = params, probs = probs, intercept = TRUE, verbose = FALSE) + gmod2 <- gmjmcmc(x, y_sin_shift, gaussian.loglik, mlpost_params = mlpost_params, transforms = "sin", params = params, probs = probs, intercept = TRUE, P = 20, verbose = FALSE) + gmod2p <- gmjmcmc.parallel(2, 2, x = x, y = y_sin_shift, loglik.pi = gaussian.loglik, mlpost_params = mlpost_params, transforms = "sin", params = params, probs = probs, intercept = TRUE, verbose = FALSE) validate.gmodel(gmod2, x, y_sin_shift) validate.gmodel(gmod2p, x, y_sin_shift) # User defined intercept - mod3 <- mjmcmc(cbind(1, x), y_shift, gaussian.loglik, fixed = 1) + mod3 <- mjmcmc(cbind(1, x), y_shift, gaussian.loglik, mlpost_params, fixed = 1) validate.model(mod3, cbind(1, x), y_shift) - gmod3 <- gmjmcmc(cbind(1, x), y_sin_shift, gaussian.loglik, transforms = "sin", params = params, probs = probs, fixed = 1, P = 20, verbose = FALSE) - gmod3p <- gmjmcmc.parallel(2, 2, x = cbind(1, x), y = y_sin_shift, loglik.pi = gaussian.loglik, transforms = "sin", params = params, probs = probs, fixed = 1, verbose = FALSE) + gmod3 <- gmjmcmc(cbind(1, x), y_sin_shift, gaussian.loglik, mlpost_params = mlpost_params, transforms = "sin", params = params, probs = probs, fixed = 1, intercept = FALSE, P = 20, verbose = FALSE) + gmod3p <- gmjmcmc.parallel(2, 2, x = cbind(1, x), y = y_sin_shift, loglik.pi = gaussian.loglik, mlpost_params = mlpost_params, transforms = "sin", params = params, probs = probs, fixed = 1, intercept = FALSE, verbose = FALSE) validate.gmodel(gmod3, cbind(1, x), y_sin_shift) validate.gmodel(gmod3p, cbind(1, x), y_sin_shift) }) -- GitLab From 67de1554f558826f9864a81bfb8c5a6de62eae5b Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Fri, 11 Apr 2025 20:55:18 +0200 Subject: [PATCH 34/36] * Make gen.params take ncov * Fix labels handling in some of the summaries --- R/arguments.R | 33 ++++++++------------------------- R/fbms.R | 12 ++++++++---- R/gmjmcmc.R | 2 +- R/mjmcmc.R | 8 +++++--- R/results.R | 2 ++ R/summary.R | 2 +- tests/testthat/test_fbms.R | 2 +- tests/testthat/test_mjmcmc.R | 2 +- tests_current/Ex2_Sec4_1.R | 14 ++++++-------- 9 files changed, 33 insertions(+), 44 deletions(-) diff --git a/R/arguments.R b/R/arguments.R index 7785019..6afed00 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -187,17 +187,9 @@ gen.probs.gmjmcmc <- function (transforms) { #' #' #' @export gen.params.mjmcmc -gen.params.mjmcmc <- function (data) { +gen.params.mjmcmc <- function (ncov) { ### Create a list of parameters for the algorithm - ## Get the dimensions of the data to set parameters based on it - if(is.list(data) && length(data$fixed)>0 && length(data$x) > 0) - ncov <- ncol(data$x) - data$fixed #This is not ok as for fbms no one knows anything about fixed and $x! - else # I fixed that, but this is somewhat not elegant still - { - data.dim <- dim(data) - ncov <- data.dim[2] - 2 - } ## Local optimization parameters sa_kern <- list(probs=c(0.1, 0.05, 0.2, 0.3, 0.2, 0.15), neigh.size=1, neigh.min=1, neigh.max=2) # Simulated annealing proposal kernel parameters @@ -317,24 +309,15 @@ gen.params.mjmcmc <- function (data) { #' @seealso \code{\link{gen.params.mjmcmc}}, \code{\link{gmjmcmc}} #' #' @export gen.params.gmjmcmc -gen.params.gmjmcmc <- function (data) { +gen.params.gmjmcmc <- function (ncov) { # Get mjmcmc params - params <- gen.params.mjmcmc(data) - - ## Get the dimensions of the data to set parameters based on it - if (is.list(data) && length(data$fixed)>0 && length(data$x) > 0) - ncov <- ncol(data$x) - data$fixed #This is not ok as for fbms no one knows anything about fixed and $x! - else # I fixed that, but this is somewhat not elegant still - { - data.dim <- dim(data) - ncov <- data.dim[2] - 2 - } + params <- gen.params.mjmcmc(ncov) feat_params <- list(D = 5, L = 15, # Hard limits on feature complexity alpha = "unit", # alpha strategy ("unit" = None, "deep" strategy 3 from Hubin et al., "random" fully Bayesian strategy) - pop.max = min(100,as.integer(ncov * 1.5)), # Max features population size + pop.max = min(100, as.integer(ncov * 1.5)), # Max features population size keep.org = FALSE, # Always keep original covariates in every population - prel.filter = 0, # Filtration threshold for first population (i.e. filter covariates even if keep.org=TRUE) + prel.filter = 0, # Filtration threshold for first population (i.e. filter covariates even if keep.org=TRUE) keep.min = 0.8, # Minimum proportion of features to always keep [0,1] eps = 0.05, # Inclusion probability limit for feature generation check.col = TRUE, # Whether the colinearity should be checked @@ -344,9 +327,9 @@ gen.params.gmjmcmc <- function (data) { # Large jump parameters large_params <- list( - neigh.size = min(as.integer(params$feat$pop.max * 0.35),as.integer(ncov * 0.35),35), - neigh.min = min(as.integer(params$feat$pop.max * 0.35),as.integer(ncov * 0.25),25), - neigh.max = min(as.integer(params$feat$pop.max * 0.35),as.integer(ncov * 0.45),45) + neigh.size = min(as.integer(params$feat$pop.max * 0.35), as.integer(ncov * 0.35), 35), + neigh.min = min(as.integer(params$feat$pop.max * 0.35), as.integer(ncov * 0.25), 25), + neigh.max = min(as.integer(params$feat$pop.max * 0.35), as.integer(ncov * 0.45), 45) ) params$large <- large_params diff --git a/R/fbms.R b/R/fbms.R index 213da8f..ced37f1 100644 --- a/R/fbms.R +++ b/R/fbms.R @@ -289,11 +289,15 @@ gen.mlpost.params.lm <- function (beta_prior, user_params, p, n) { } else if (beta_prior == "uniform") { return(list(type = "uniform")) } else{ - if(!is.null(user_params$a)) - { + if (!is.null(user_params$a)) { alpha <- user_params$a - } else - alpha <- -1#check how BAS uses the default + } else { + if (beta_prior == "g-prior") { + alpha <- min(p^2, n) + } else { + alpha <- -1 #check how BAS uses the default + } + } if (beta_prior == "g-prior") { return(list(method = 0, alpha = alpha)) } else if (beta_prior == "hyper-g") { diff --git a/R/gmjmcmc.R b/R/gmjmcmc.R index 1943212..d07b593 100644 --- a/R/gmjmcmc.R +++ b/R/gmjmcmc.R @@ -76,7 +76,7 @@ gmjmcmc <- function ( # Generate default probabilities and parameters if there are none supplied. if (is.null(probs)) probs <- gen.probs.gmjmcmc(transforms) - if (is.null(params)) params <- gen.params.gmjmcmc(data) + if (is.null(params)) params <- gen.params.gmjmcmc(ncol(data$x) - data$fixed) if (!is.null(mlpost_params)) params$mlpost <- mlpost_params # Extract labels from column names in dataframe diff --git a/R/mjmcmc.R b/R/mjmcmc.R index 58dfc3f..8dc38ac 100644 --- a/R/mjmcmc.R +++ b/R/mjmcmc.R @@ -38,7 +38,7 @@ mjmcmc <- function ( y, loglik.pi = fbms.mlik.master, mlpost_params = list(family = "gaussian", beta_prior = list(type = "g-prior")), - intercept = FALSE, + intercept = TRUE, fixed = 0, N = 100, probs = NULL, @@ -51,12 +51,14 @@ mjmcmc <- function ( x <- cbind(1, x) fixed <- fixed + 1 } - labels <- names(x) data <- check.data(x, y, fixed, verbose) + labels <- names(x) + if (fixed != 0) + labels <- labels[-seq_len(fixed)] # Generate default probabilities and parameters if there are none supplied. if (is.null(probs)) probs <- gen.probs.mjmcmc() - if (is.null(params)) params <- gen.params.mjmcmc(data) + if (is.null(params)) params <- gen.params.mjmcmc(ncol(data$x) - data$fixed) if (!is.null(mlpost_params)) params$mlpost <- mlpost_params # Acceptance probability diff --git a/R/results.R b/R/results.R index 022bd66..4fa4445 100644 --- a/R/results.R +++ b/R/results.R @@ -395,6 +395,8 @@ get.best.model.gmjmcmc <- function (result, labels) { best.pop.id <- which.max(sapply(result$best.margs,function(x)x)) best.mod.id <- which.max(sapply(result$models[[best.pop.id]],function(x)x$crit)) ret <- result$models[[best.pop.id]][[best.mod.id]] + ret$intercept <- result$intercept + ret$fixed <- result$fixed coefnames <- sapply(result$populations[[best.pop.id]], print.feature, labels = labels)[ret$model] if (result$intercept) coefnames <- c("Intercept", coefnames) names(ret$coefs) <- coefnames diff --git a/R/summary.R b/R/summary.R index ecd49fd..2d4c6e5 100644 --- a/R/summary.R +++ b/R/summary.R @@ -192,7 +192,7 @@ summary.mjmcmc <- function (object, tol = 0.0001, labels = FALSE, effects = NULL #' @export summary.mjmcmc_parallel <- function (object, tol = 0.0001, labels = FALSE, effects = NULL, verbose = TRUE, ...) { # Get features as strings for printing - if (length(labels) == 1 && labels[1] == FALSE && length(object[[1]]$labels) > 0) { + if (length(labels) == 1 && labels[1] == FALSE && length(object$chains[[1]]$labels) > 0) { labels = object$chains[[1]]$labels } feats.strings <- sapply(object$chains[[1]]$populations, FUN = function(x) print.feature(x = x, labels = labels, round = 2)) diff --git a/tests/testthat/test_fbms.R b/tests/testthat/test_fbms.R index e7ce460..51f9e0c 100644 --- a/tests/testthat/test_fbms.R +++ b/tests/testthat/test_fbms.R @@ -45,7 +45,7 @@ test_that("Test (G)MJMCMC through the fbms function", { mpm_model <- get.mpm.model(model, y, x) } - params <- gen.params.gmjmcmc(list(x = x, y = y_sin)) + params <- gen.params.gmjmcmc(ncol(x)) probs <- gen.probs.gmjmcmc("sin") probs$gen <- c(0, 1, 0, 0) params$feat$D <- 1 diff --git a/tests/testthat/test_mjmcmc.R b/tests/testthat/test_mjmcmc.R index e6b243f..a317546 100644 --- a/tests/testthat/test_mjmcmc.R +++ b/tests/testthat/test_mjmcmc.R @@ -45,7 +45,7 @@ test_that("Test (G)MJMCMC", { mpm_model <- get.mpm.model(model, y, x) } - params <- gen.params.gmjmcmc(list(x = x, y = y_sin)) + params <- gen.params.gmjmcmc(ncol(x)) probs <- gen.probs.gmjmcmc("sin") probs$gen <- c(0, 1, 0, 0) params$feat$D <- 1 diff --git a/tests_current/Ex2_Sec4_1.R b/tests_current/Ex2_Sec4_1.R index 75d4e41..af8c456 100644 --- a/tests_current/Ex2_Sec4_1.R +++ b/tests_current/Ex2_Sec4_1.R @@ -10,7 +10,7 @@ # Logical to decide whether to perform analysis with fbms function # If FALSE then gmjmcmc or gmjmcmc.parallel function is used -use.fbms <- FALSE +use.fbms <- FALSE stronger.singal <- FALSE library(mvtnorm) @@ -42,8 +42,9 @@ X<-scale(X)/sqrt(n) df <- as.data.frame(cbind(y, X)) +colnames(df) <- c("Y", paste0("X", seq_len(ncol(df) - 1))) -correct.model + 1 +correct.model beta.k @@ -52,10 +53,9 @@ transforms <- c("sigmoid","sin_deg","exp_dbl","p0","troot","to3") set.seed(123) if (use.fbms) { - result <- result <- fbms(data = df, method = "gmjmcmc", - transforms = transforms, P = 40) + result <- fbms(data = df, method = "gmjmcmc", transforms = transforms, P = 40) } else { - result <- gmjmcmc(x = df[-1],y=df[,1], mlpost_params = list(family = "gaussian", beta_prior = list(type = "robust")), transforms = transforms, P = 40) + result <- gmjmcmc(x = df[-1], y = df[,1], mlpost_params = list(family = "gaussian", beta_prior = list(type = "robust")), transforms = transforms, P = 40) } summary(result) @@ -93,9 +93,7 @@ params.lin$loglik$r <- 1/dim(df)[1] if (use.fbms) { result.lin <- fbms(data = df, N = 5000) } else { - result.lin <- mjmcmc(y = df[,1],x = df[,-1], N = 5000, probs = probs.lin, - params = params.lin) - + result.lin <- mjmcmc(y = df[,1], x = df[,-1], mlpost_params = list(family = "gaussian", beta_prior = list(type = "Jeffreys-BIC")), N = 5000, probs = probs.lin, params = params.lin) } -- GitLab From e9bb0cd21a23b1796d8a43d71ba39c2e73f9666a Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Fri, 11 Apr 2025 21:10:22 +0200 Subject: [PATCH 35/36] * Port all the examples to use x/y instead of data. --- tests_current/Ex10_Sec6_2.R | 18 ++++++++--------- tests_current/Ex11_Sec6_3.R | 6 +++--- tests_current/Ex12_Sec6_4.R | 14 ++++++------- tests_current/Ex13_Sec6_5.R | 10 +++++----- tests_current/Ex1_Sec3.R | 10 +++++----- tests_current/Ex2_Sec4_1.R | 2 +- tests_current/Ex3_Sec4_1.R | 8 ++++---- .../Ex3_Sec4_1_Removed, replaced_by_Ex7.R | 14 ++++++------- tests_current/Ex4_Sec4_2.R | 12 +++++------ tests_current/Ex5_Sec4_3.R | 10 +++++----- tests_current/Ex6_Sec4_4.R | 12 +++++------ tests_current/Ex8_Sec5.2_logic_regression.R | 20 +++++++++---------- tests_current/Ex8_Sec5_2.R | 12 +++++------ tests_current/Ex9_Sec6_1.R | 6 +++--- 14 files changed, 77 insertions(+), 77 deletions(-) diff --git a/tests_current/Ex10_Sec6_2.R b/tests_current/Ex10_Sec6_2.R index 70d0b62..428a895 100644 --- a/tests_current/Ex10_Sec6_2.R +++ b/tests_current/Ex10_Sec6_2.R @@ -40,7 +40,7 @@ transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(1,1,0,1) # Modifications and interactions! -params <- gen.params.gmjmcmc(df) +params <- gen.params.gmjmcmc(ncol(df) - 1) params$feat$D <- 1 # Set depth of features to 1 (still allows for interactions) params$mlpost$r = 1/dim(df)[1] params$feat$pop.max = 10 @@ -203,7 +203,7 @@ if (use.fbms) { transforms = transforms, N.init = 30, probs = probs, params = params, P=3) } else { - result1a <- gmjmcmc(data = df, loglik.pi = mixed.model.loglik.lme4, + result1a <- gmjmcmc(x = df[, -1], y = df[, 1], loglik.pi = mixed.model.loglik.lme4, transforms = transforms, N.init = 30, probs = probs, params = params, P = 3) } @@ -219,7 +219,7 @@ if (use.fbms) { transforms = transforms, N.init = 30, probs = probs, params = params, P=3) } else { - result1b <- gmjmcmc(data = df, loglik.pi = mixed.model.loglik.inla, + result1b <- gmjmcmc(x = df[, -1], y = df[, 1], , loglik.pi = mixed.model.loglik.inla, transforms = transforms, N.init = 30, probs = probs, params = params, P = 3) } @@ -231,7 +231,7 @@ if (use.fbms) { transforms = transforms, N.init = 30, probs = probs, params = params, P=3) } else { - result1c <- gmjmcmc(data = df, loglik.pi = mixed.model.loglik.rtmb, + result1c <- gmjmcmc(x = df[, -1], y = df[, 1], , loglik.pi = mixed.model.loglik.rtmb, transforms = transforms, N.init = 30, probs = probs, params = params, P = 3) } @@ -249,13 +249,13 @@ c(time.lme4$callback_msg, time.inla$callback_msg, time.rtmb$callback_msg) set.seed(20062024) params$feat$pop.max = 10 -result2a <- gmjmcmc.parallel(runs = 40, cores = 10, data = df, loglik.pi = mixed.model.loglik.lme4, transforms = transforms, N.init=100, probs = probs, params = params, P = 25) +result2a <- gmjmcmc.parallel(runs = 40, cores = 10, x = df[, -1], y = df[, 1], loglik.pi = mixed.model.loglik.lme4, transforms = transforms, N.init=100, probs = probs, params = params, P = 25) summary(result2a,tol = 0.05,labels=names(df)[-1]) set.seed(21062024) -result2b <- gmjmcmc.parallel(runs = 120, cores = 40, data = df, loglik.pi = mixed.model.loglik.lme4, transforms = transforms, N.init=100, probs = probs, params = params, P = 25) +result2b <- gmjmcmc.parallel(runs = 120, cores = 40, x = df[, -1], y = df[, 1], loglik.pi = mixed.model.loglik.lme4, transforms = transforms, N.init=100, probs = probs, params = params, P = 25) summary(result2b, labels = names(df)[-1]) @@ -267,7 +267,7 @@ plot(result2b) set.seed(03072024) -result2c <- gmjmcmc.parallel(runs = 200, cores = 40, data = df, loglik.pi = mixed.model.loglik.lme4, transforms = transforms, N.init=100, probs = probs, params = params, P = 25) +result2c <- gmjmcmc.parallel(runs = 200, cores = 40, x = df[, -1], y = df[, 1], loglik.pi = mixed.model.loglik.lme4, transforms = transforms, N.init=100, probs = probs, params = params, P = 25) summary(result2c, labels = names(df)[-1]) summary(result2c, labels = names(df)[-1], pop = "last") @@ -289,7 +289,7 @@ summary(result2c, labels = names(df)[-1]) set.seed(22052024) params$mlpost$INLA.num.threads = 1 # Number of threads used by INLA set to 1 -result2a <- gmjmcmc.parallel(runs = 20, cores = 20, data = df, loglik.pi = mixed.model.loglik.inla, transforms = transforms, N.init=30, probs = probs, params = params, P = 25) +result2a <- gmjmcmc.parallel(runs = 20, cores = 20, x = df[, -1], y = df[, 1], loglik.pi = mixed.model.loglik.inla, transforms = transforms, N.init=30, probs = probs, params = params, P = 25) plot(result2a) summary(result2a, labels = names(df)[-1]) @@ -301,7 +301,7 @@ params$feat$check.col = F set.seed(20062024) params$mlpost$INLA.num.threads = 1 # Number of threads used by INLA set to 1 -result2b <- gmjmcmc.parallel(runs = 100, cores = 20, data = df, loglik.pi = mixed.model.loglik.inla, transforms = transforms, N.init=16, probs = probs, params = params, P = 15) +result2b <- gmjmcmc.parallel(runs = 100, cores = 20, x = df[, -1], y = df[, 1], loglik.pi = mixed.model.loglik.inla, transforms = transforms, N.init=16, probs = probs, params = params, P = 15) summary(result2b, labels = names(df)[-1]) diff --git a/tests_current/Ex11_Sec6_3.R b/tests_current/Ex11_Sec6_3.R index 771ef79..ae2337c 100644 --- a/tests_current/Ex11_Sec6_3.R +++ b/tests_current/Ex11_Sec6_3.R @@ -35,7 +35,7 @@ transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(1,1,0,1) # Only modifications! -params <- gen.params.gmjmcmc(df) +params <- gen.params.gmjmcmc(ncol(df) - 1) params$feat$D <- 2 # Set depth of features to 2 (allow for interactions) params$mlpost$r = 1/dim(df)[1] @@ -101,7 +101,7 @@ if (use.fbms) { result <- fbms(data = df, family = "custom", loglik.pi = poisson.loglik.inla, method = "gmjmcmc", transforms = transforms, probs = probs, params = params, P=3) } else { - result <- gmjmcmc(data = df, loglik.pi = poisson.loglik.inla, transforms = transforms, + result <- gmjmcmc(x = df[, -1], y = df[, 1], , loglik.pi = poisson.loglik.inla, transforms = transforms, probs = probs, params = params, P = 3) } @@ -119,7 +119,7 @@ if (use.fbms) { method = "gmjmcmc.parallel", runs = 40, cores = 40, transforms = transforms, probs = probs, params = params, P=25) } else { - result2 <- gmjmcmc.parallel(runs = 40, cores = 40, data = df, loglik.pi = poisson.loglik.inla, + result2 <- gmjmcmc.parallel(runs = 40, cores = 40, x = df[, -1], y = df[, 1], , loglik.pi = poisson.loglik.inla, transforms = transforms, probs = probs, params = params, P = 25) } time.inla = toc() diff --git a/tests_current/Ex12_Sec6_4.R b/tests_current/Ex12_Sec6_4.R index 3eb5e4b..298cceb 100644 --- a/tests_current/Ex12_Sec6_4.R +++ b/tests_current/Ex12_Sec6_4.R @@ -36,7 +36,7 @@ n = dim(df)[1] p = dim(df)[2] - 1 -params <- gen.params.gmjmcmc(data = df) +params <- gen.params.gmjmcmc(ncol(df) - 1) params$mlpost$r = 0.5 params$mlpost$subs = 0.01 @@ -82,14 +82,14 @@ logistic.posterior.bic.irlssgd <- function (y, x, model, complex, params) set.seed(100001) tic() # subsampling analysis -tmp1 <- gmjmcmc(df, logistic.posterior.bic.irlssgd, transforms = transforms, +tmp1 <- gmjmcmc(x = df[, -1], y = df[, 1], logistic.posterior.bic.irlssgd, transforms = transforms, params = params, P = 2, sub = T) time1 = toc() set.seed(100002) tic() # regular analysis -tmp2 <- gmjmcmc(df, logistic.loglik, transforms = transforms, +tmp2 <- gmjmcmc(x = df[, -1], y = df[, 1], logistic.loglik, transforms = transforms, params = params, P = 2) time2 = toc() @@ -107,7 +107,7 @@ c(time1, time2) set.seed(100003) tic() -result <- gmjmcmc.parallel(runs = 10,cores = 10, data = df, +result <- gmjmcmc.parallel(runs = 10,cores = 10, x = df[, -1], y = df[, 1], loglik.pi = logistic.posterior.bic.irlssgd, transforms = transforms, params = params, P = 3, sub = T) time3 = toc() @@ -120,7 +120,7 @@ summary(result) set.seed(100004) tic() -result1a <- gmjmcmc.parallel(runs = 10,cores = 10, data = df, +result1a <- gmjmcmc.parallel(runs = 10,cores = 10, x = df[, -1], y = df[, 1], loglik.pi = logistic.loglik, transforms = transforms, params = params, P = 3) time4 = toc() @@ -139,7 +139,7 @@ summary(result1a) set.seed(100005) tic() -result2 <- gmjmcmc.parallel(runs = 40,cores = 40, data = df, +result2 <- gmjmcmc.parallel(runs = 40,cores = 40, x = df[, -1], y = df[, 1], loglik.pi = logistic.posterior.bic.irlssgd, transforms = transforms, params = params, P = 10, sub = T) time5 = toc() @@ -153,7 +153,7 @@ summary(result2) set.seed(100006) tic() -result2a <- gmjmcmc.parallel(runs = 40,cores = 40, data = df, +result2a <- gmjmcmc.parallel(runs = 40,cores = 40, x = df[, -1], y = df[, 1], loglik.pi = logistic.loglik, transforms = transforms, params = params, P = 10) time6 = toc() diff --git a/tests_current/Ex13_Sec6_5.R b/tests_current/Ex13_Sec6_5.R index c441264..99521e0 100644 --- a/tests_current/Ex13_Sec6_5.R +++ b/tests_current/Ex13_Sec6_5.R @@ -43,7 +43,7 @@ df.test <- df[-train,] time <- df.train$time -params <- gen.params.gmjmcmc(data = df.train[,-1]) +params <- gen.params.gmjmcmc(ncol(df.train) - 1) params$mlpost$r = 0.5 params$mlpost$time = time #the time variable goes into the params structure @@ -83,7 +83,7 @@ surv.pseudo.loglik = function(y, x, model, complex, params){ #Single chain analysis (just to illustrate that it works) set.seed(5) -result <- gmjmcmc(data = df.train[,-1], loglik.pi = surv.pseudo.loglik, transforms = transforms, params = params, P = 5) +result <- gmjmcmc(x = df.train[, -(1:2)], y = df.train[, 2], loglik.pi = surv.pseudo.loglik, transforms = transforms, params = params, P = 5) summary(result,tol = 0.01,labels = names(df.train)[-c(1:2)],effects = c(0.025,0.5,0.975)) summary(result) @@ -97,7 +97,7 @@ linpreds <- predict(result,df.test[,-(1:2)], link = function(x) x) #Parallel version set.seed(15) probs$gen <- c(1,1,1,1) -result2 <- gmjmcmc.parallel(runs = 80, cores = 40, data = df.train[,-1], +result2 <- gmjmcmc.parallel(runs = 80, cores = 40, x = df.train[, -(1:2)], y = df.train[, 2], loglik.pi = surv.pseudo.loglik, transforms = transforms, probs = probs, params = params, P = 25) summary(result2,tol = 0.01,labels = names(df.train)[-1],effects = c(0.025,0.5,0.975)) @@ -112,7 +112,7 @@ linpreds2 <- predict(result2,df.test[,-(1:2)], link = function(x) x) #Parallel version only linear terms set.seed(25) probs$gen <- c(0,0,0,1) -result3 <- gmjmcmc.parallel(runs = 80, cores = 40, data = df.train[,-1], +result3 <- gmjmcmc.parallel(runs = 80, cores = 40, x = df.train[, -(1:2)], y = df.train[, 2], loglik.pi = surv.pseudo.loglik, transforms = transforms, probs = probs, params = params, P = 25) @@ -126,7 +126,7 @@ linpreds3 <- predict(result3,df.test[,-(1:2)], link = function(x) x) #Parallel version only fractional polynomials set.seed(35) probs$gen <- c(0,1,0,1) -result4 <- gmjmcmc.parallel(runs = 80, cores = 40, data = df.train[,-1], +result4 <- gmjmcmc.parallel(runs = 80, cores = 40, x = df.train[, -(1:2)], y = df.train[, 2], loglik.pi = surv.pseudo.loglik, transforms = transforms, probs = probs, params = params, P = 25) diff --git a/tests_current/Ex1_Sec3.R b/tests_current/Ex1_Sec3.R index 595af57..39f3d6b 100644 --- a/tests_current/Ex1_Sec3.R +++ b/tests_current/Ex1_Sec3.R @@ -36,7 +36,7 @@ use.fbms = FALSE # #################################################### -params <- gen.params.gmjmcmc(df.train) +params <- gen.params.gmjmcmc(ncol(df.train) - 1) params$mlpost$var <- 1 if (use.fbms) { @@ -101,7 +101,7 @@ if (use.fbms) { result_parallel_unitphi <- fbms(data = df.train, method = "gmjmcmc.parallel", transforms = transforms, runs = 40, cores = 10, P=25,params = params) } else { - result_parallel_unitphi <- gmjmcmc.parallel(runs = 40, cores = 10, data = df.train, loglik.pi = gaussian.loglik, + result_parallel_unitphi <- gmjmcmc.parallel(runs = 40, cores = 10, x = df.train[, -1], y = df.train[, 1], loglik.pi = gaussian.loglik, transforms = transforms, P = 25, params = params) } summary(result_parallel_unitphi, tol = 0.01) @@ -138,10 +138,10 @@ params$mlpost$r <- 1/dim(df.train)[1] params$mlpost$alpha <- dim(df.train)[1] set.seed(1) if (use.fbms) { - result_parallel_g <- fbms(data = df.train,family = "custom", method = "gmjmcmc.parallel",loglik.pi = lm.logpost.bas, transforms = transforms, + result_parallel_g <- fbms(data = df.train, family = "custom", method = "gmjmcmc.parallel",loglik.pi = lm.logpost.bas, transforms = transforms, runs = 40, cores = 10, P=25,params = params) } else { - result_parallel_g <- gmjmcmc.parallel(runs = 40, cores = 10, data = df.train, loglik.pi = lm.logpost.bas, + result_parallel_g <- gmjmcmc.parallel(runs = 40, cores = 10, x = df.train[, -1], y = df.train[, 1], loglik.pi = lm.logpost.bas, transforms = transforms, P=25,params = params) } summary(result_parallel_g, tol = 0.01) @@ -298,7 +298,7 @@ for(prior in c("g-prior", #default for N.final = N.init -params <- gen.params.gmjmcmc(df.train) +params <- gen.params.gmjmcmc(ncol(df.train) - 1) params$mlpost$g <- dim(df.train)[1] tic() result.default <- fbms(formula = semimajoraxis ~ 1 + . , data = df.train, method = "gmjmcmc.parallel",cores = 10, runs = 10, transforms = transforms, loglik.pi = gaussian.loglik.g, params = params, P = 50) diff --git a/tests_current/Ex2_Sec4_1.R b/tests_current/Ex2_Sec4_1.R index af8c456..5592bdd 100644 --- a/tests_current/Ex2_Sec4_1.R +++ b/tests_current/Ex2_Sec4_1.R @@ -84,7 +84,7 @@ set.seed(123) probs.lin <- gen.probs.mjmcmc() -params.lin <- gen.params.mjmcmc(df) +params.lin <- gen.params.mjmcmc(ncol(df) - 1) params.lin$loglik$r <- 1/dim(df)[1] #to set variance to unknown uncomment below diff --git a/tests_current/Ex3_Sec4_1.R b/tests_current/Ex3_Sec4_1.R index d08c144..07edbf0 100644 --- a/tests_current/Ex3_Sec4_1.R +++ b/tests_current/Ex3_Sec4_1.R @@ -32,7 +32,7 @@ probs$gen = c(0,0,0,1) # Candidates for the first MJMCMC round based on correlation with response c.vec = unlist(mclapply(2:ncol(df), function(x)abs(cor(df[,1],df[,x])))) ids = sort(order(c.vec,decreasing=TRUE)[1:50]) -params = gen.params.gmjmcmc(df) +params = gen.params.gmjmcmc(ncol(df) - 1) params$feat$prel.filter <- ids params$feat$check.col <- T @@ -87,7 +87,7 @@ if(run.parallel) P=50,N.init=1000,N.final=1000,runs=10,cores=10) }else { start = Sys.time() - result_parallel1=gmjmcmc.parallel(data=df,loglik.pi=gaussian.loglik.g,transforms=transforms, + result_parallel1=gmjmcmc.parallel(x = df[, -1], y = df[, 1], loglik.pi=gaussian.loglik.g,transforms=transforms, probs=probs,params=params, P=50,N.init=1000,N.final=1000,runs=10,cores=10) end = Sys.time() @@ -103,7 +103,7 @@ if(run.parallel) method="gmjmcmc.parallel", P=50,N.init=1000,N.final=1000,runs=10,cores=10) } else { - result_parallel2=gmjmcmc.parallel(data=df,loglik.pi=gaussian.loglik.g,transforms=transforms, + result_parallel2=gmjmcmc.parallel(x = df[, -1], y = df[, 1], loglik.pi=gaussian.loglik.g,transforms=transforms, probs=probs,params=params, P=50,N.init=1000,N.final=1000,runs=10,cores=10) } @@ -117,7 +117,7 @@ if(run.parallel) method="gmjmcmc.parallel", P=50,N.init=1000,N.final=1000,runs=10,cores=10) } else { - result_parallel3=gmjmcmc.parallel(data=df,loglik.pi=gaussian.loglik.g,transforms=transforms, + result_parallel3=gmjmcmc.parallel(x = df[, -1], y = df[, 1], loglik.pi=gaussian.loglik.g,transforms=transforms, probs=probs,params=params, P=50,N.init=1000,N.final=1000,runs=10,cores=10) } diff --git a/tests_current/Ex3_Sec4_1_Removed, replaced_by_Ex7.R b/tests_current/Ex3_Sec4_1_Removed, replaced_by_Ex7.R index 558347b..7f60d1a 100644 --- a/tests_current/Ex3_Sec4_1_Removed, replaced_by_Ex7.R +++ b/tests_current/Ex3_Sec4_1_Removed, replaced_by_Ex7.R @@ -32,7 +32,7 @@ ids = sort(order(c.vec,decreasing=TRUE)[1:50]) # #################################################### -params = gen.params.gmjmcmc(df) +params = gen.params.gmjmcmc(ncol(df) - 1) params$feat$check.col <- F params$feat$pop.max = 60 params$prel.select <- ids @@ -50,7 +50,7 @@ if (use.fbms) { result1 <- fbms(data = df, method = "gmjmcmc", transforms = transforms, probs = probs, params = params, P=25) } else { - result1 = gmjmcmc(data = df, transforms = transforms, + result1 = gmjmcmc(x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25) } show(Sys.time()-start.time) @@ -64,7 +64,7 @@ if (use.fbms) { result2 <- fbms(data = df, method = "gmjmcmc", transforms = transforms, probs = probs, params = params, P=25) } else { - result2 = gmjmcmc(data = df, transforms = transforms, + result2 = gmjmcmc(x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25) } @@ -108,7 +108,7 @@ min(cor(X.best)) ids3 = ids transforms = c("") -params = gen.params.gmjmcmc(df[,ids3]) +params = gen.params.gmjmcmc(ncol(df[,ids3]) - 1) params$feat$check.col <- F params$feat$pop.max = 60 params$prel.select <- ids3 @@ -123,7 +123,7 @@ if (use.fbms) { result3 <- fbms(data = df, method = "gmjmcmc", transforms = transforms, probs = probs, params = params, P=25) } else { - result3 = gmjmcmc(data = df, transforms = transforms, + result3 = gmjmcmc(x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25) } @@ -154,7 +154,7 @@ if (use.fbms) { transforms = transforms, probs = probs, params = params, P=25, N.init=500, N.final=500) } else { - result_parallel = gmjmcmc.parallel(runs = 40, cores = 40,data = df, + result_parallel = gmjmcmc.parallel(runs = 40, cores = 40, x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25, N.init=500, N.final=500) } @@ -187,7 +187,7 @@ if (use.fbms) { transforms = transforms, probs = probs, params = params, P=25, N.init=500, N.final=500) } else { - result_parallel2 = gmjmcmc.parallel(runs = 40, cores = 10,data = df, + result_parallel2 = gmjmcmc.parallel(runs = 40, cores = 10, x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25, N.init=500, N.final=500) } diff --git a/tests_current/Ex4_Sec4_2.R b/tests_current/Ex4_Sec4_2.R index b9e2d20..6e0c158 100644 --- a/tests_current/Ex4_Sec4_2.R +++ b/tests_current/Ex4_Sec4_2.R @@ -35,7 +35,7 @@ df <- as.data.frame(cbind(y, X)) transforms <- c("") -params <- gen.params.gmjmcmc(df) +params <- gen.params.gmjmcmc(ncol(df) - 1) #params$mlpost$var = "unknown" #this will set the variance to unknwon probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(1,0,0,1) #Include interactions and mutations @@ -51,7 +51,7 @@ if (use.fbms) { result <- fbms(data = df, method = "gmjmcmc", transforms = transforms, probs = probs, params = params, P=40) } else { - result <- gmjmcmc(df, transforms = transforms, params = params, probs = probs, P=40) + result <- gmjmcmc(x = df[, -1], y = df[, 1], transforms = transforms, params = params, probs = probs, P=40) } summary(result) @@ -61,7 +61,7 @@ if (use.fbms) { result2 <- fbms(data = df, method = "gmjmcmc", transforms = transforms, probs = probs, params = params, P=40) } else { - result2 <- gmjmcmc(df, transforms = transforms, N.init = 1000, N.final = 5000, + result2 <- gmjmcmc(x = df[, -1], y = df[, 1], transforms = transforms, N.init = 1000, N.final = 5000, probs = probs, params = params, P = 40) } summary(result2, tol = 0.01) @@ -81,7 +81,7 @@ if (use.fbms) { runs = 40, cores = 10, probs = probs, params = params, P=25) } else { - result_parallel = gmjmcmc.parallel(runs = 40, cores = 10, data = df, + result_parallel = gmjmcmc.parallel(runs = 40, cores = 10, x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25) } @@ -97,7 +97,7 @@ if (use.fbms) { runs = 40, cores = 10, N.init=1000, N.final=2000, probs = probs, params = params, P=25) } else { - result_parallel2 = gmjmcmc.parallel(runs = 40, cores = 10, data = df, + result_parallel2 = gmjmcmc.parallel(runs = 40, cores = 10, x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25, N.init=1000, N.final=2000) } @@ -120,7 +120,7 @@ set.seed(123) if (use.fbms) { result.lin <- fbms(data = df, N = 5000) } else { - result.lin <- mjmcmc(df, gaussian.loglik, N = 5000) + result.lin <- mjmcmc(x = df[, -1], y = dt[, 1], gaussian.loglik, N = 5000) } plot(result.lin) diff --git a/tests_current/Ex5_Sec4_3.R b/tests_current/Ex5_Sec4_3.R index 0124956..c085ef8 100644 --- a/tests_current/Ex5_Sec4_3.R +++ b/tests_current/Ex5_Sec4_3.R @@ -40,7 +40,7 @@ df$y = rnorm(n =n, mean = mu,sd = 1) transforms <- c("p0","p2","p3","p05","pm05","pm1","pm2","p0p0","p0p05","p0p1","p0p2","p0p3","p0p05","p0pm05","p0pm1","p0pm2") probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(0,1,0,1) # Only modifications! -params <- gen.params.gmjmcmc(df) +params <- gen.params.gmjmcmc(ncol(df) - 1) params$feat$D <- 1 # Set depth of features to 1 #to set variance to unknown uncomment below @@ -58,7 +58,7 @@ if (use.fbms) { result <- fbms(data = df, method = "gmjmcmc", transforms = transforms, probs = probs, params = params) } else { - result <- gmjmcmc(df, transforms = transforms, probs = probs, params = params) + result <- gmjmcmc(x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params) } summary(result, labels = names(df[-1])) #plot.diagn(result,FUN = median) @@ -77,7 +77,7 @@ if (use.fbms) { result_parallel <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel = gmjmcmc.parallel(runs = 40, cores = 10, data = df, + result_parallel = gmjmcmc.parallel(runs = 40, cores = 10, x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25) } #summary(result_parallel, labels = names(df[-1])) @@ -94,7 +94,7 @@ if (use.fbms) { result_parallel2 <- fbms(runs = 40, cores = 10,data = df, method = "gmjmcmc.parallel", transforms = transforms, probs = probs, params = params, P=25, N.init=1000, N.final=2000) } else { - result_parallel2 = gmjmcmc.parallel(runs = 40, cores = 10,data = df, + result_parallel2 = gmjmcmc.parallel(runs = 40, cores = 10, x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=25, N.init=1000, N.final=2000) } @@ -111,7 +111,7 @@ if (use.fbms) { result_parallel3 <- fbms(data = df, method = "gmjmcmc.parallel", transforms = transforms, probs = probs, params = params, P=50, N.init=2000, N.final=4000) } else { - result_parallel3 = gmjmcmc.parallel(runs = 40, cores = 40, data = df, transforms = transforms, + result_parallel3 = gmjmcmc.parallel(runs = 40, cores = 40, x = df[, -1], y = df[, 1], transforms = transforms, probs = probs, params = params, P=50, N.init=2000, N.final=4000) } #summary(result_parallel3, labels = names(df[-1])) diff --git a/tests_current/Ex6_Sec4_4.R b/tests_current/Ex6_Sec4_4.R index 034c0cc..81e8b45 100644 --- a/tests_current/Ex6_Sec4_4.R +++ b/tests_current/Ex6_Sec4_4.R @@ -65,7 +65,7 @@ transforms <- c("sigmoid") probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(0,0,1,1) #Only projections! -params <- gen.params.gmjmcmc(df.training) +params <- gen.params.gmjmcmc(ncol(df.training) - 1) #params$mlpost$r = 0.9 #params$mlpost$var = "unknown" @@ -83,7 +83,7 @@ if (use.fbms) { result <- fbms(data = df.training, method = "gmjmcmc", transforms = transforms, probs = probs, params = params) } else { - result <- gmjmcmc(df.training, transforms = transforms, probs = probs,params = params) + result <- gmjmcmc(x = df.training[, -1], y = df.training[, 1], transforms = transforms, probs = probs,params = params) } summary(result) @@ -110,7 +110,7 @@ if (use.fbms) { result_parallel <- fbms(data = df.training, method = "gmjmcmc.parallel", runs = 4, cores = 4, transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel = gmjmcmc.parallel(runs = 4, cores = 4, data = df.training, + result_parallel = gmjmcmc.parallel(runs = 4, cores = 4, x = df.training[, -1], y = df.training[, 1], loglik.pi =gaussian.loglik,loglik.alpha = gaussian.loglik.alpha, transforms = transforms, probs = probs, params = params, P=25) } @@ -143,7 +143,7 @@ if (use.fbms) { result.a3 <- fbms(data = df.training, method = "gmjmcmc", transforms = transforms, probs = probs, params = params) } else { - result.a3 <- gmjmcmc(df.training, transforms = transforms, probs = probs, params = params) + result.a3 <- gmjmcmc(x = df.training[, -1], y = df.training[, 1], transforms = transforms, probs = probs, params = params) } summary(result.a3) @@ -173,7 +173,7 @@ if (use.fbms) { result_parallel.a3 <- fbms(data = df.training, method = "gmjmcmc.parallel", runs = 40, cores = 40, transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel.a3 = gmjmcmc.parallel(runs = 40, cores = 40, data = df.training, + result_parallel.a3 = gmjmcmc.parallel(runs = 40, cores = 40, x = df.training[, -1], y = df.training[, 1], loglik.pi =gaussian.loglik,loglik.alpha = gaussian.loglik.alpha, transforms = transforms, probs = probs, params = params, P=25) } @@ -212,7 +212,7 @@ if (use.fbms) { result.fp <- fbms(data = df.training, method = "gmjmcmc.parallel", runs = 40, cores = 40, transforms = transforms, probs = probs, params = params, P=25) } else { - result.fp <- gmjmcmc.parallel(runs = 40, cores = 40, data = df.training, + result.fp <- gmjmcmc.parallel(runs = 40, cores = 40, x = df.training[, -1], y = df.training[, 1], loglik.pi =gaussian.loglik,loglik.alpha = gaussian.loglik.alpha, transforms = transforms, probs = probs, params = params, P=25) } diff --git a/tests_current/Ex8_Sec5.2_logic_regression.R b/tests_current/Ex8_Sec5.2_logic_regression.R index c736984..cf79e97 100644 --- a/tests_current/Ex8_Sec5.2_logic_regression.R +++ b/tests_current/Ex8_Sec5.2_logic_regression.R @@ -44,7 +44,7 @@ probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(1,1,0,1) #No projections allowed -params <- gen.params.gmjmcmc(df.training) +params <- gen.params.gmjmcmc(ncol(df.training) - 1) params$mlpost$p <- 50 #number of leaves params$feat$pop.max <- 31 params$feat$L <- 15 @@ -98,12 +98,12 @@ if (use.fbms) { } else { # result <- gmjmcmc(df.training, transforms = transforms, probs = probs) - result <- gmjmcmc(df.training, loglik.pi = estimate.logic.lm,N.init = 500,N.final = 500, , P = 25, + result <- gmjmcmc(x = df.training[, -1], y = df.training[, 1], loglik.pi = estimate.logic.lm,N.init = 500,N.final = 500, , P = 25, transforms = transforms,params = params, probs = probs) } summary(result) -mpm <- get.mpm.model(result,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) +mpm <- get.mpm.model(result, y = df.training$Y2, x = df.training[,-1], family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) mbest <- get.best.model(result) @@ -147,12 +147,12 @@ if (use.fbms) { method = "gmjmcmc.parallel", runs = 16, cores = 8, transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel = gmjmcmc.parallel(runs = 16, cores = 8, data = df.training, + result_parallel = gmjmcmc.parallel(runs = 16, cores = 8, x = df.training[, -1], y = df.training[, 1], loglik.pi = estimate.logic.lm,N.init = 500,N.final = 500, transforms = transforms, probs = probs, params = params, P=25) } summary(result_parallel) -mpm <- get.mpm.model(result_parallel, y = df.training$Y2, x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) +mpm <- get.mpm.model(result_parallel, y = df.training$Y2, x = df.training[,-1], family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) mbest <- get.best.model(result_parallel) @@ -192,7 +192,7 @@ probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(1,1,0,1) #No projections allowed probs$filter <- 0.6 -params <- gen.params.gmjmcmc(df.training) +params <- gen.params.gmjmcmc(ncol(df.training) - 1) params$mlpost$p <- 50 #number of leaves params$mlpost$n <- n #used in specifying parameter v of the tCCH prior params$mlpost$p.a <- 1 @@ -207,7 +207,7 @@ estimate.logic.tcch = function(y, x, model, complex, params) { if (length(params) == 0) - params <- list(r = 1/dim(x)[1]) + params <- list(r = 1 / dim(x)[1]) suppressWarnings({ mod <- fastglm(as.matrix(x[, model]), y, family = gaussian()) }) @@ -248,12 +248,12 @@ if (use.fbms) { } else { # result <- gmjmcmc(df.training, transforms = transforms, probs = probs) - result <- gmjmcmc(df.training, loglik.pi = estimate.logic.tcch,N.init = 500,N.final = 500, P = 25, + result <- gmjmcmc(x = df.training[, -1], y = df.training[, 1], loglik.pi = estimate.logic.tcch,N.init = 500,N.final = 500, P = 25, transforms = transforms,params = params, probs = probs) } summary(result) -mpm <- get.mpm.model(result,y = df.training$Y2,x = df.training[,-1],family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) +mpm <- get.mpm.model(result, y = df.training$Y2, x = df.training[,-1], family = "custom", loglik.pi = estimate.logic.lm,params = params$mlpost) mbest <- get.best.model(result) @@ -290,7 +290,7 @@ if (use.fbms) { method = "gmjmcmc.parallel", runs = 16, cores = 8, transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel = gmjmcmc.parallel(runs = 16, cores = 8, data = df.training, + result_parallel = gmjmcmc.parallel(runs = 16, cores = 8, x = df.training[, -1], y = df.training[, 1], loglik.pi = estimate.logic.tcch,N.init = 500,N.final = 500, transforms = transforms, probs = probs, params = params, P=25) } diff --git a/tests_current/Ex8_Sec5_2.R b/tests_current/Ex8_Sec5_2.R index 56768ef..c2a15ba 100644 --- a/tests_current/Ex8_Sec5_2.R +++ b/tests_current/Ex8_Sec5_2.R @@ -66,7 +66,7 @@ transforms <- c("sigmoid") probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(0,0,1,1) #Only projections! -params <- gen.params.gmjmcmc(df.training) +params <- gen.params.gmjmcmc(ncol(df.training) - 1) params$mlpost$r = 0.2 ############################################## @@ -116,7 +116,7 @@ if (use.fbms) { } else { # result <- gmjmcmc(df.training, transforms = transforms, probs = probs) - result <- gmjmcmc(df.training, loglik.pi = gaussian.loglik.gap.open.prior, + result <- gmjmcmc(x = df.training[, -1], y = df.training[, 1], loglik.pi = gaussian.loglik.gap.open.prior, transforms = transforms, probs = probs) } @@ -147,7 +147,7 @@ if (use.fbms) { method = "gmjmcmc.parallel", runs = 40, cores = 40, transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel = gmjmcmc.parallel(runs = 40, cores = 40, data = df.training, + result_parallel = gmjmcmc.parallel(runs = 40, cores = 40, x = df.training[, -1], y = df.training[, 1], loglik.pi = gaussian.loglik.gap.open.prior, loglik.alpha = gaussian.loglik.alpha, transforms = transforms, probs = probs, params = params, P=25) @@ -182,7 +182,7 @@ if (use.fbms) { method = "gmjmcmc", transforms = transforms, probs = probs, params = params) } else { - result.a3 <- gmjmcmc(df.training, transforms = transforms, probs = probs, params = params) + result.a3 <- gmjmcmc(x = df.training[, -1], y = df.training[, 1], transforms = transforms, probs = probs, params = params) } summary(result.a3) @@ -214,7 +214,7 @@ if (use.fbms) { method = "gmjmcmc.parallel", runs = 40, cores = 40, transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel.a3 = gmjmcmc.parallel(runs = 40, cores = 40, data = df.training, + result_parallel.a3 = gmjmcmc.parallel(runs = 40, cores = 40, x = df.training[, -1], y = df.training[, 1], loglik.pi = gaussian.loglik.gap.open.prior, loglik.alpha = gaussian.loglik.alpha, transforms = transforms, probs = probs, params = params, P=25) @@ -252,7 +252,7 @@ if (use.fbms) { result.fp <- fbms(data = df.training, method = "gmjmcmc.parallel", runs = 40, cores = 40, transforms = transforms, probs = probs, params = params, P=25) } else { - result.fp <- gmjmcmc.parallel(runs = 40, cores = 40, data = df.training, + result.fp <- gmjmcmc.parallel(runs = 40, cores = 40, x = df.training[, -1], y = df.training[, 1], loglik.pi =gaussian.loglik,loglik.alpha = gaussian.loglik.alpha, transforms = transforms, probs = probs, params = params, P=25) } diff --git a/tests_current/Ex9_Sec6_1.R b/tests_current/Ex9_Sec6_1.R index b64b43a..6a20fc5 100644 --- a/tests_current/Ex9_Sec6_1.R +++ b/tests_current/Ex9_Sec6_1.R @@ -38,7 +38,7 @@ transforms <- c("sigmoid","sin_deg","exp_dbl","p0","troot","to3") probs <- gen.probs.gmjmcmc(transforms) probs$gen <- c(1,1,1,1) -params <- gen.params.gmjmcmc(df) +params <- gen.params.gmjmcmc(ncol(df) - 1) params$feat$check.col <- F #################################################### # @@ -79,7 +79,7 @@ if (use.fbms) { result <- fbms(data = df, method = "gmjmcmc", family = "binomial", transforms = transforms, probs = probs, params = params) } else { - result <- gmjmcmc(df, logistic.loglik, transforms + result <- gmjmcmc(x = df[, -1], y = df[, 1], logistic.loglik, transforms = transforms, probs = probs, params = params) } # Default tuning parameters for logistic regression: @@ -125,7 +125,7 @@ if (use.fbms) { runs = 40, cores = 40, transforms = transforms, probs = probs, params = params, P=25) } else { - result_parallel = gmjmcmc.parallel(runs = 40, cores = 40, data = df, + result_parallel = gmjmcmc.parallel(runs = 40, cores = 40, x = df[, -1], y = df[, 1], loglik.pi = logistic.loglik, transforms = transforms, probs = probs, params = params, P=25) } -- GitLab From 8724fee2a0ca1a5220fc2bb7c554dbeb6dcc64e7 Mon Sep 17 00:00:00 2001 From: jonlachmann Date: Fri, 11 Apr 2025 21:18:07 +0200 Subject: [PATCH 36/36] * CRAN fixes. --- R/arguments.R | 6 +++--- README.md | 4 ++-- man/fbms.Rd | 2 +- man/gen.params.gmjmcmc.Rd | 6 +++--- man/gen.params.mjmcmc.Rd | 4 ++-- man/mjmcmc.Rd | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/R/arguments.R b/R/arguments.R index 6afed00..82a685d 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -121,7 +121,7 @@ gen.probs.gmjmcmc <- function (transforms) { #' Generate a parameter list for MJMCMC (Mode Jumping MCMC) #' -#' @param data The dataset that will be used in the algorithm +#' @param ncov The number of covariates in the dataset that will be used in the algorithm #' #' @return A list of parameters to use when running the mjmcmc function. #' @@ -220,7 +220,7 @@ gen.params.mjmcmc <- function (ncov) { #' #' This function generates the full list of parameters required for the Generalized Mode Jumping Markov Chain Monte Carlo (GMJMCMC) algorithm, building upon the parameters from \code{gen.params.mjmcmc}. The generated parameter list includes feature generation settings, population control parameters, and optimization controls for the search process. #' -#' @param data A data frame containing the dataset with covariates and response variable. +#' @param ncov The number of covariates in the dataset that will be used in the algorithm #' @return A list of parameters for controlling GMJMCMC behavior: #' #' @section Feature Generation Parameters (\code{feat}): @@ -303,7 +303,7 @@ gen.params.mjmcmc <- function (ncov) { #' #' @examples #' data <- data.frame(y = rnorm(100), x1 = rnorm(100), x2 = rnorm(100)) -#' params <- gen.params.gmjmcmc(data) +#' params <- gen.params.gmjmcmc(ncol(data) - 1) #' str(params) #' #' @seealso \code{\link{gen.params.mjmcmc}}, \code{\link{gmjmcmc}} diff --git a/README.md b/README.md index 08b826d..a32b6ff 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ [![](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) -[![](https://img.shields.io/github/last-commit/jonlachmann/GMJMCMC.svg)](https://github.com/jonlachmann/GMJMCMC/commits/master) +[![](https://img.shields.io/github/last-commit/jonlachmann/GMJMCMC.svg)](https://github.com/jonlachmann/GMJMCMC/commits/data-inputs) [![](https://img.shields.io/github/languages/code-size/jonlachmann/GMJMCMC.svg)](https://github.com/jonlachmann/GMJMCMC) [![R build status](https://github.com/jonlachmann/GMJMCMC/workflows/R-CMD-check/badge.svg)](https://github.com/jonlachmann/GMJMCMC/actions) -[![codecov](https://codecov.io/gh/jonlachmann/GMJMCMC/branch/FBMS/graph/badge.svg)](https://codecov.io/gh/jonlachmann/GMJMCMC) +[![codecov](https://codecov.io/gh/jonlachmann/GMJMCMC/branch/data-inputs/graph/badge.svg)](https://codecov.io/gh/jonlachmann/GMJMCMC) [![License: GPL](https://img.shields.io/badge/license-GPL-blue.svg)](https://cran.r-project.org/web/licenses/GPL) # FBMS - Flexible Bayesian Model Selection diff --git a/man/fbms.Rd b/man/fbms.Rd index ea3b282..210b1df 100644 --- a/man/fbms.Rd +++ b/man/fbms.Rd @@ -8,7 +8,7 @@ Or Fit a BGLM model using Modified Mode Jumping Markov Chain Monte Carlo (MCMC) fbms( formula = NULL, family = "gaussian", - beta_prior = list(type = "robust"), + beta_prior = list(type = "g-prior"), model_prior = NULL, data = NULL, impute = FALSE, diff --git a/man/gen.params.gmjmcmc.Rd b/man/gen.params.gmjmcmc.Rd index 945cd64..611c747 100644 --- a/man/gen.params.gmjmcmc.Rd +++ b/man/gen.params.gmjmcmc.Rd @@ -4,10 +4,10 @@ \alias{gen.params.gmjmcmc} \title{Generate a parameter list for GMJMCMC (Genetically Modified MJMCMC)} \usage{ -gen.params.gmjmcmc(data) +gen.params.gmjmcmc(ncov) } \arguments{ -\item{data}{A data frame containing the dataset with covariates and response variable.} +\item{ncov}{The number of covariates in the dataset that will be used in the algorithm} } \value{ A list of parameters for controlling GMJMCMC behavior: @@ -101,7 +101,7 @@ This function generates the full list of parameters required for the Generalized \examples{ data <- data.frame(y = rnorm(100), x1 = rnorm(100), x2 = rnorm(100)) -params <- gen.params.gmjmcmc(data) +params <- gen.params.gmjmcmc(ncol(data) - 1) str(params) } diff --git a/man/gen.params.mjmcmc.Rd b/man/gen.params.mjmcmc.Rd index 0fe0b17..67f217b 100644 --- a/man/gen.params.mjmcmc.Rd +++ b/man/gen.params.mjmcmc.Rd @@ -4,10 +4,10 @@ \alias{gen.params.mjmcmc} \title{Generate a parameter list for MJMCMC (Mode Jumping MCMC)} \usage{ -gen.params.mjmcmc(data) +gen.params.mjmcmc(ncov) } \arguments{ -\item{data}{The dataset that will be used in the algorithm} +\item{ncov}{The number of covariates in the dataset that will be used in the algorithm} } \value{ A list of parameters to use when running the mjmcmc function. diff --git a/man/mjmcmc.Rd b/man/mjmcmc.Rd index 0b16005..064f3f5 100644 --- a/man/mjmcmc.Rd +++ b/man/mjmcmc.Rd @@ -8,8 +8,8 @@ mjmcmc( x, y, loglik.pi = fbms.mlik.master, - mlpost_params = list(family = "gaussian", beta_prior = list(type = "robust")), - intercept = FALSE, + mlpost_params = list(family = "gaussian", beta_prior = list(type = "g-prior")), + intercept = TRUE, fixed = 0, N = 100, probs = NULL, -- GitLab