@@ -733,31 +733,51 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar,
733733# ' Run a user-supplied function for element-wise data
734734# '
735735# ' @description
736- # ' `ModelArray.wrap` runs a user-supplied function \code{FUN} at each
737- # ' requested element and returns a tibble of results combined across
738- # ' elements.
736+ # ' `ModelArray.gam` fits a generalized additive model at each requested
737+ # ' element in a \linkS4class{ModelArray} and returns a tibble of requested
738+ # ' model statistics. There is no model-level p-value for GAMs, so there is
739+ # ' no \code{correct.p.value.model} argument.
739740# '
740741# ' @details
741- # ' This provides a generic framework reusing ModelArray's per-element
742- # ' looping, alignment, subject-thresholding, and parallelization. The user
743- # ' function is called as \code{FUN(data = dat, ...)} where \code{dat} is
744- # ' \code{phenotypes} with all scalar columns appended for the current
745- # ' element. The return value from \code{FUN} for a single element must be
746- # ' one of:
742+ # ' You may request returning specific statistical variables by setting
743+ # ' \code{var.*}, or you can get all by setting \code{full.outputs = TRUE}.
744+ # ' Note that statistics covered by \code{full.outputs} or \code{var.*} are
745+ # ' the ones from \code{broom::tidy()}, \code{broom::glance()}, and
746+ # ' \code{summary.gam()} only, and do not include corrected p-values.
747+ # ' However FDR-corrected p-values (\code{"fdr"}) are generated by default.
748+ # '
749+ # ' List of acceptable statistic names for each of \code{var.*}:
747750# ' \itemize{
748- # ' \item a one-row \code{data.frame} or \code{tibble}
749- # ' \item a named list
750- # ' \item a named atomic vector
751+ # ' \item \code{var.smoothTerms}: \code{c("edf", "ref.df", "statistic",
752+ # ' "p.value")}; From \code{broom::tidy(parametric = FALSE)}.
753+ # ' \item \code{var.parametricTerms}: \code{c("estimate", "std.error",
754+ # ' "statistic", "p.value")}; From \code{broom::tidy(parametric = TRUE)}.
755+ # ' \item \code{var.model}: \code{c("adj.r.squared", "dev.expl",
756+ # ' "sp.criterion", "scale", "df", "logLik", "AIC", "BIC", "deviance",
757+ # ' "df.residual", "nobs")}; From \code{broom::glance()} and
758+ # ' \code{\link[mgcv]{summary.gam}}.
751759# ' }
752- # ' The column names from the first successful element determine the final
753- # ' schema.
754760# '
755- # ' Note: \code{ModelArray.wrap} never performs any p-value corrections or
756- # ' modifications. If you need adjusted p-values (e.g. FDR), implement
757- # ' them inside \code{FUN }.
761+ # ' Smooth term names in the output are normalized: \code{s(age)} becomes
762+ # ' \code{s_age}, \code{ti(x,z)} becomes \code{ti_x_z}, and
763+ # ' \code{s(x):oFactor} becomes \code{s_x_BYoFactor }.
758764# '
759- # ' Use \code{\link{exampleElementData}} to construct a sample per-element
760- # ' data.frame for testing your function before committing to a full run.
765+ # ' For p-value corrections (arguments \code{correct.p.value.*}), supported
766+ # ' methods include all methods in \code{p.adjust.methods} except
767+ # ' \code{"none"}. You can request more than one method. FDR-corrected
768+ # ' p-values (\code{"fdr"}) are calculated by default. Turn it off by
769+ # ' setting to \code{"none"}.
770+ # '
771+ # ' When \code{changed.rsq.term.index} is provided, a reduced model (dropping
772+ # ' the specified term) is fit at each element to compute delta adjusted
773+ # ' R-squared and partial R-squared. This approximately doubles execution
774+ # ' time per requested term. The term index refers to the position on the
775+ # ' right-hand side of \code{formula} (use \code{labels(terms(formula))} to
776+ # ' see the ordering).
777+ # '
778+ # ' Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} are
779+ # ' mainly for input data with subject-specific masks, i.e. currently only
780+ # ' for volume data. For fixel-wise data, you may ignore these arguments.
761781# '
762782# ' @inheritParams ModelArray.lm
763783# '
@@ -785,54 +805,58 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar,
785805# ' level for scalar writes. Default 4.
786806# ' @param ... Additional arguments forwarded to \code{FUN}.
787807# '
788- # ' @return If \code{flag_initiate = TRUE}, a list with one component:
789- # ' \describe{
790- # ' \item{column_names}{Character vector. The column names derived from
791- # ' the return value of \code{user_fun}, with \code{"element_id"}
792- # ' prepended. For unnamed list or atomic returns, columns are named
793- # ' \code{v1}, \code{v2}, etc. Set to \code{NaN} if the element was
794- # ' skipped or errored.}
795- # ' }
796- # ' If \code{flag_initiate = FALSE}, a numeric vector of length
797- # ' \code{num.stat.output} with \code{element_id} (0-based) first and
798- # ' the coerced output of \code{user_fun} in subsequent positions.
799- # ' All-\code{NaN} (except \code{element_id}) if the element was skipped
800- # ' or if an error occurred with \code{on_error = "skip"}.
808+ # ' @return A data.frame with one row per element. The first column is
809+ # ' \code{element_id} (0-based). Remaining columns contain the requested
810+ # ' statistics, named as \code{<term>.<statistic>} for per-term statistics
811+ # ' and \code{model.<statistic>} for model-level statistics. Smooth term
812+ # ' names are normalized (e.g. \code{s_age.statistic}). If p-value
813+ # ' corrections were requested, additional columns are appended with the
814+ # ' correction method as suffix (e.g. \code{s_age.p.value.fdr}). If
815+ # ' \code{changed.rsq.term.index} was requested, additional columns
816+ # ' \code{<term>.delta.adj.rsq} and \code{<term>.partial.rsq} are
817+ # ' appended.
801818# '
802819# ' @seealso \code{\link{ModelArray.lm}} for linear models,
803- # ' \code{\link{ModelArray.gam}} for GAMs,
804- # ' \code{\link{exampleElementData}} for building test data,
805- # ' \linkS4class{ModelArray} for the input class.
820+ # ' \code{\link{ModelArray.wrap}} for user-supplied functions,
821+ # ' \code{\link{gen_gamFormula_fxSmooth}} and
822+ # ' \code{\link{gen_gamFormula_contIx}} for formula helpers,
823+ # ' \linkS4class{ModelArray} for the input class,
824+ # ' \code{\link{ModelArray}} for the constructor,
825+ # ' \code{\link{exampleElementData}} for testing formulas on a single
826+ # ' element.
806827# '
807828# ' @examples{
808829# ' \dontrun{
809830# ' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
810831# ' phenotypes <- read.csv("cohort.csv")
811832# '
812- # ' # Simple custom function
813- # ' my_fun <- function(data, ...) {
814- # ' mod <- lm(FD ~ age + sex, data = data)
815- # ' tidy_out <- broom::tidy(mod)
816- # ' # Return a one-row tibble
817- # ' tibble::tibble(
818- # ' age_estimate = tidy_out$estimate[tidy_out$term == "age"],
819- # ' age_pvalue = tidy_out$p.value[tidy_out$term == "age"]
820- # ' )
821- # ' }
822- # '
833+ # ' # Fit GAM with default outputs
834+ # ' results <- ModelArray.gam(
835+ # ' FD ~ s(age, fx = TRUE) + sex,
836+ # ' data = ma,
837+ # ' phenotypes = phenotypes,
838+ # ' scalar = "FD"
839+ # ' )
840+ # ' head(results)
823841# '
824- # ' # Test on one element first
825- # ' test_df <- exampleElementData(ma, scalar = "FD",
826- # ' i_element = 1,
827- # ' phenotypes = phenotypes)
828- # ' my_fun(data = test_df)
842+ # ' # With changed R-squared for the smooth term (term index 1)
843+ # ' results_rsq <- ModelArray.gam(
844+ # ' FD ~ s(age, fx = TRUE) + sex,
845+ # ' data = ma,
846+ # ' phenotypes = phenotypes,
847+ # ' scalar = "FD",
848+ # ' changed.rsq.term.index = list(1)
849+ # ' )
829850# '
830- # ' # Run across all elements
831- # ' results <- ModelArray.wrap (
832- # ' FUN = my_fun ,
851+ # ' # Full outputs, no p-value correction
852+ # ' results_full <- ModelArray.gam (
853+ # ' FD ~ s(age, fx = TRUE) + sex ,
833854# ' data = ma,
834855# ' phenotypes = phenotypes,
835- # ' scalar = "FD"
856+ # ' scalar = "FD",
857+ # ' full.outputs = TRUE,
858+ # ' correct.p.value.smoothTerms = "none",
859+ # ' correct.p.value.parametricTerms = "none"
836860# ' )
837861# ' }
838862# ' }
0 commit comments