# Microbiome
processedMicrobiome = CMTFtoolbox::Georgiou2025$Tooth_microbiome
# Remove samples due to low library size
mask = rowSums(processedMicrobiome$data) > 6500
processedMicrobiome$data = processedMicrobiome$data[mask,]
processedMicrobiome$mode1 = processedMicrobiome$mode1[mask,]
# Remove duplicate samples
mask = !(processedMicrobiome$mode1$SampleID %in% c("A11-8 36", "A11-10 17", "A11-15 17"))
processedMicrobiome$data = processedMicrobiome$data[mask,]
processedMicrobiome$mode1 = processedMicrobiome$mode1[mask,]
# Also remove subject A11-8 due to being an outlier
processedMicrobiome$data = processedMicrobiome$data[-23,,]
processedMicrobiome$mode1 = processedMicrobiome$mode1[-23,]
# CLR transformation
df = processedMicrobiome$data + 1
geomeans = pracma::geomean(as.matrix(df), dim=2)
df_clr = log(sweep(df, 1, geomeans, FUN="/"))
# Feature filtering
sparsityThreshold = 0.5
maskA = processedMicrobiome$mode1$PainS_NopainA == "A"
maskS = processedMicrobiome$mode1$PainS_NopainA == "S"
dfA = processedMicrobiome$data[maskA,]
dfS = processedMicrobiome$data[maskS,]
sparsityA = colSums(dfA == 0) / nrow(dfA)
sparsityS = colSums(dfS == 0) / nrow(dfS)
mask = (sparsityA <= sparsityThreshold) | (sparsityS <= sparsityThreshold)
processedMicrobiome$data = df_clr[,mask]
processedMicrobiome$mode2 = processedMicrobiome$mode2[mask,]
# Center and scale
processedMicrobiome$data = sweep(processedMicrobiome$data, 2, colMeans(processedMicrobiome$data), FUN="-")
processedMicrobiome$data = sweep(processedMicrobiome$data, 2, apply(processedMicrobiome$data, 2, sd), FUN="/")
# Cytokines
processedCytokines_case = CMTFtoolbox::Georgiou2025$Inflammatory_mediators
# Select only case subjects
mask = processedCytokines_case$mode1$case_control == "case"
processedCytokines_case$data = processedCytokines_case$data[mask,,]
processedCytokines_case$mode1 = processedCytokines_case$mode1[mask,]
# Select only samples with corresponding microbiome data
mask = processedCytokines_case$mode1$SubjectID %in% processedMicrobiome$mode1$SubjectID
processedCytokines_case$data = processedCytokines_case$data[mask,,]
processedCytokines_case$mode1 = processedCytokines_case$mode1[mask,]
processedCytokines_case$data = log(processedCytokines_case$data + 0.005)
processedCytokines_case$data = multiwayCenter(processedCytokines_case$data, 1)
processedCytokines_case$data = multiwayScale(processedCytokines_case$data, 2)
# Prep data
datasets = list(processedCytokines_case$data, as.matrix(processedMicrobiome$data))
modes = list(c(1,2,3),c(1,4))
Z = setupCMTFdata(datasets, modes, normalize=TRUE)
acmtf_model = CMTFtoolbox::acmtf_opt(Z, 2, nstart = 10, method="L-BFGS", numCores=10)
lambda = abs(acmtf_model$Fac[[5]])
colnames(lambda) = paste0("C", 1:2)
lambda %>%
as_tibble() %>%
mutate(block=c("cytokines", "microbiome")) %>%
mutate(block=factor(block, levels=c("cytokines", "microbiome"))) %>%
pivot_longer(-block) %>%
ggplot(aes(x=as.factor(name),y=value,fill=as.factor(block))) +
geom_bar(stat="identity",position=position_dodge(),col="black") +
xlab("ACMTF component number") +
ylab(expression(lambda)) +
scale_x_discrete(labels=1:3) +
scale_fill_manual(name="Dataset",values = hue_pal()(2),labels=c("Inflammatory mediators", "Tooth microbiome")) +
theme(legend.position="top", text=element_text(size=16))
df = processedCytokines_case$mode1 %>% mutate(V1=acmtf_model$Fac[[1]][,1],V2=acmtf_model$Fac[[1]][,2]) %>% mutate(Gender = as.numeric(as.factor(Gender)), PainS_NopainA = as.numeric(as.factor(PainS_NopainA)))
summary(lm(V1 ~ Gender + PainS_NopainA, data=df))
#>
#> Call:
#> lm(formula = V1 ~ Gender + PainS_NopainA, data = df)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.21124 -0.09583 -0.03519 0.07407 0.31275
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -0.64875 0.13502 -4.805 0.000108 ***
#> Gender 0.15494 0.06195 2.501 0.021190 *
#> PainS_NopainA 0.28024 0.06148 4.558 0.000191 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.1472 on 20 degrees of freedom
#> Multiple R-squared: 0.5671, Adjusted R-squared: 0.5238
#> F-statistic: 13.1 on 2 and 20 DF, p-value: 0.0002311
summary(lm(V2 ~ Gender + PainS_NopainA, data=df))
#>
#> Call:
#> lm(formula = V2 ~ Gender + PainS_NopainA, data = df)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.40028 -0.14344 0.01718 0.13641 0.35876
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -0.04116 0.19947 -0.206 0.839
#> Gender 0.08208 0.09152 0.897 0.380
#> PainS_NopainA -0.05142 0.09083 -0.566 0.578
#>
#> Residual standard error: 0.2174 on 20 degrees of freedom
#> Multiple R-squared: 0.05506, Adjusted R-squared: -0.03944
#> F-statistic: 0.5826 on 2 and 20 DF, p-value: 0.5676