ACSM Cancer Report

American College of Sports Medicine Cancer Insights

Analysis template for the ACSM cancer report.

Summary tables:

Code
library(dplyr)
library(tidyr)
library(ggplot2)

grip_summary <- combined %>%
  # define consistent group & cancer labels
  mutate(
    activity_group = case_when(
      dataset == "SG25" ~ "SG_Athletes",
      dataset == "NHANES" & meets_ACSM_guidelines ~ "NHANES_Active",
      dataset == "NHANES" & !meets_ACSM_guidelines ~ "NHANES_Inactive",
      TRUE ~ NA_character_
    ),
    cancer = if_else(cancer %in% c(TRUE, 1, "Yes", "Y"), "CancerTRUE", "CancerFALSE")
  ) %>%
  filter(!is.na(activity_group), is.finite(grip_dom)) %>%
  group_by(activity_group, cancer) %>%
  summarise(
    n = n(),
    mean_grip = mean(grip_dom, na.rm = TRUE),
    sd_grip   = sd(grip_dom, na.rm = TRUE),
    summary = sprintf("%.2f ± %.2f (n=%d)", mean_grip, sd_grip, n),
    .groups = "drop"
  ) %>%
  select(activity_group, cancer, summary) %>%
  pivot_wider(
    names_from = cancer,
    values_from = summary
  ) %>%
  arrange(factor(activity_group, levels = c("NHANES_Inactive", "NHANES_Active", "SG_Athletes")))

grip_summary
# A tibble: 3 × 3
  activity_group  CancerFALSE            CancerTRUE           
  <chr>           <chr>                  <chr>                
1 NHANES_Inactive 31.02 ± 10.26 (n=1772) 29.26 ± 10.49 (n=349)
2 NHANES_Active   35.23 ± 10.41 (n=1902) 34.13 ± 10.83 (n=335)
3 SG_Athletes     39.35 ± 12.34 (n=89)   37.32 ± 9.47 (n=26)  
Code
library(dplyr)
library(tidyr)
library(broom)

# 1) Prepare data with consistent labels
df <- combined %>%
  mutate(
    activity_group = case_when(
      dataset == "SG25" ~ "SG_Athletes",
      dataset == "NHANES" & isTRUE(meets_ACSM_guidelines) ~ "NHANES_Active",
      dataset == "NHANES" & identical(meets_ACSM_guidelines, FALSE) ~ "NHANES_Inactive",
      dataset == "NHANES" & !is.na(meets_ACSM_guidelines) & !meets_ACSM_guidelines ~ "NHANES_Inactive",
      TRUE ~ NA_character_
    ),
    cancer = if_else(cancer %in% c(TRUE, 1, "Yes", "Y"), "CancerTRUE", "CancerFALSE")
  ) %>%
  filter(!is.na(activity_group), is.finite(grip_dom))

# 2) Mean ± SD (n) per activity_group × cancer, wide format with guaranteed columns
summ_wide <- df %>%
  group_by(activity_group, cancer) %>%
  summarise(
    n = n(),
    mean_grip = mean(grip_dom, na.rm = TRUE),
    sd_grip   = sd(grip_dom, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(cell = sprintf("%.2f ± %.2f (n=%d)", mean_grip, sd_grip, n)) %>%
  select(activity_group, cancer, cell) %>%
  pivot_wider(
    names_from = cancer,
    values_from = cell,
    values_fill = "—"         # ensure CancerFALSE/TRUE columns always exist
  )

# 3) Within-group t-tests (robust to groups missing a cancer level)
ttest_by_group <- df %>%
  group_by(activity_group) %>%
  summarise(
    ttest = list(
      if (n_distinct(cancer) == 2) {
        tidy(t.test(grip_dom ~ cancer, conf.level = 0.90)) #0.95
      } else {
        tibble(statistic = NA_real_, parameter = NA_real_, p.value = NA_real_)
      }
    ),
    .groups = "drop"
  ) %>%
  unnest(ttest) %>%
  transmute(activity_group, t = statistic, df = parameter, p_value = p.value)

# 4) Final table
grip_summary_ttest <- summ_wide %>%
  left_join(ttest_by_group, by = "activity_group") %>%
  arrange(factor(activity_group, levels = c("NHANES_Inactive", "NHANES_Active", "SG_Athletes")))

grip_summary_ttest
# A tibble: 2 × 6
  activity_group  CancerFALSE            CancerTRUE              t    df p_value
  <chr>           <chr>                  <chr>               <dbl> <dbl>   <dbl>
1 NHANES_Inactive 31.02 ± 10.26 (n=1772) 29.26 ± 10.49 (n=3… 2.88  488.  0.00419
2 SG_Athletes     39.35 ± 12.34 (n=89)   37.32 ± 9.47 (n=26) 0.893  52.3 0.376  
Code
library(gt)
grip_summary_ttest %>%
  mutate(p_value = ifelse(is.na(p_value), "—", sprintf("%.3f", p_value))) %>%
  gt() %>%
  tab_header(
    title = "Grip Strength by Cancer History within Activity Groups",
    subtitle = "Mean ± SD (n) with within-group Welch t-tests"
  )
Grip Strength by Cancer History within Activity Groups
Mean ± SD (n) with within-group Welch t-tests
activity_group CancerFALSE CancerTRUE t df p_value
NHANES_Inactive 31.02 ± 10.26 (n=1772) 29.26 ± 10.49 (n=349) 2.8768630 488.02746 0.004
SG_Athletes 39.35 ± 12.34 (n=89) 37.32 ± 9.47 (n=26) 0.8928796 52.29828 0.376
Code
library(dplyr)
library(broom)

# make sure grouping variable and cancer labels are consistent
df <- combined %>%
  mutate(
    activity_group = case_when(
      dataset == "SG25" ~ "SG_Athletes",
      dataset == "NHANES" & meets_ACSM_guidelines ~ "NHANES_Active",
      dataset == "NHANES" & !meets_ACSM_guidelines ~ "NHANES_Inactive",
      TRUE ~ NA_character_
    ),
    cancer = if_else(cancer %in% c(TRUE, 1, "Yes", "Y"), "CancerTRUE", "CancerFALSE")
  ) %>%
  filter(!is.na(activity_group), is.finite(grip_dom))

# unique groups
groups <- unique(df$activity_group)

# loop through each and print t-test results
for (g in groups) {
  cat("\n-------------------------------\n")
  cat("Activity Group:", g, "\n")
  cat("-------------------------------\n")

  sub <- df %>% filter(activity_group == g)

  # only run test if both cancer levels are present
  if (n_distinct(sub$cancer) == 2) {
    t_res <- t.test(grip_dom ~ cancer, data = sub, conf.level = 0.90) #0.95
    print(t_res)
  } else {
    cat("⚠️  Skipping:", g, "– only one cancer level present.\n")
  }
}

-------------------------------
Activity Group: NHANES_Inactive 
-------------------------------

    Welch Two Sample t-test

data:  grip_dom by cancer
t = 2.8769, df = 488.03, p-value = 0.004192
alternative hypothesis: true difference in means between group CancerFALSE and group CancerTRUE is not equal to 0
90 percent confidence interval:
 0.7520097 2.7689638
sample estimates:
mean in group CancerFALSE  mean in group CancerTRUE 
                 31.02410                  29.26361 


-------------------------------
Activity Group: NHANES_Active 
-------------------------------

    Welch Two Sample t-test

data:  grip_dom by cancer
t = 1.7362, df = 449.51, p-value = 0.08322
alternative hypothesis: true difference in means between group CancerFALSE and group CancerTRUE is not equal to 0
90 percent confidence interval:
 0.05610361 2.15971163
sample estimates:
mean in group CancerFALSE  mean in group CancerTRUE 
                 35.23328                  34.12537 


-------------------------------
Activity Group: SG_Athletes 
-------------------------------

    Welch Two Sample t-test

data:  grip_dom by cancer
t = 0.89288, df = 52.298, p-value = 0.376
alternative hypothesis: true difference in means between group CancerFALSE and group CancerTRUE is not equal to 0
90 percent confidence interval:
 -1.775298  5.831218
sample estimates:
mean in group CancerFALSE  mean in group CancerTRUE 
                 39.34719                  37.31923 

Bayesian ANOVA:

Code
library(brms)

# ensure artifacts/models directory exists under analysis/
model_dir <- here::here("analysis", "artifacts", "models")
dir.create(model_dir, showWarnings = FALSE, recursive = TRUE)

# define model file name within artifacts storage
model_file <- file.path(model_dir, "anova_grip_activity_cancer.rds")

# check and load or fit
if (file.exists(model_file)) {
  message("✅ Model already exists — loading from file.")
  anova_fit <- readRDS(model_file)
} else {
  message("🚀 Model not found — fitting new model now.")

  anova_fit <- brm(
    formula = grip_dom ~ activity_group * cancer,
    data = df,
    family = student(),
    backend = "cmdstanr",
    seed = 42,
    chains = 4,
    cores = 4,
    iter = 4000,
    prior = c(
      prior(normal(0, 5), class = "b"),
      prior(student_t(3, 0, 10), class = "Intercept"),
      prior(exponential(1), class = "sigma")
    )
  )

  # save the fitted model
  saveRDS(anova_fit, file = model_file)
  message("💾 Model saved to ", model_file)
}
Code
library(emmeans)

# Marginal means (adjusted for the model structure)
emm <- emmeans(anova_fit, ~ activity_group * cancer, , level = 0.90) #0.95
summary(emm)
 activity_group  cancer      emmean lower.HPD upper.HPD
 NHANES_Active   CancerFALSE   35.2      34.8      35.6
 NHANES_Inactive CancerFALSE   31.0      30.6      31.4
 SG_Athletes     CancerFALSE   38.9      37.1      40.7
 NHANES_Active   CancerTRUE    34.1      33.2      35.0
 NHANES_Inactive CancerTRUE    29.2      28.3      30.1
 SG_Athletes     CancerTRUE    37.3      34.2      40.5

Point estimate displayed: median 
HPD interval probability: 0.9 
Code
pairs(emm, adjust = "none", level = 0.90) #0.95
 contrast                                                 estimate lower.HPD
 NHANES_Active CancerFALSE - NHANES_Inactive CancerFALSE      4.21     3.658
 NHANES_Active CancerFALSE - SG_Athletes CancerFALSE         -3.74    -5.546
 NHANES_Active CancerFALSE - NHANES_Active CancerTRUE         1.09     0.113
 NHANES_Active CancerFALSE - NHANES_Inactive CancerTRUE       5.97     4.975
 NHANES_Active CancerFALSE - SG_Athletes CancerTRUE          -2.14    -5.388
 NHANES_Inactive CancerFALSE - SG_Athletes CancerFALSE       -7.95    -9.817
 NHANES_Inactive CancerFALSE - NHANES_Active CancerTRUE      -3.12    -4.176
 NHANES_Inactive CancerFALSE - NHANES_Inactive CancerTRUE     1.76     0.758
 NHANES_Inactive CancerFALSE - SG_Athletes CancerTRUE        -6.34    -9.489
 SG_Athletes CancerFALSE - NHANES_Active CancerTRUE           4.84     2.745
 SG_Athletes CancerFALSE - NHANES_Inactive CancerTRUE         9.73     7.633
 SG_Athletes CancerFALSE - SG_Athletes CancerTRUE             1.59    -1.872
 NHANES_Active CancerTRUE - NHANES_Inactive CancerTRUE        4.90     3.563
 NHANES_Active CancerTRUE - SG_Athletes CancerTRUE           -3.23    -6.586
 NHANES_Inactive CancerTRUE - SG_Athletes CancerTRUE         -8.15   -11.287
 upper.HPD
    4.7634
   -1.9111
    2.0884
    6.9220
    0.9575
   -6.1238
   -2.1537
    2.7385
   -3.1450
    6.8482
   11.6743
    5.1239
    6.1790
   -0.0186
   -4.7084

Point estimate displayed: median 
HPD interval probability: 0.9 

References

List citations inline with @citation-key. The bibliography renders automatically below.