Skip to contents
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(parafac4microbiome)
library(NPLStoolbox)
library(CMTFtoolbox)
#> 
#> Attaching package: 'CMTFtoolbox'
#> The following object is masked from 'package:NPLStoolbox':
#> 
#>     npred
#> The following objects are masked from 'package:parafac4microbiome':
#> 
#>     fac_to_vect, reinflateFac, reinflateTensor, vect_to_fac
library(scales)

ACMTF

# Microbiome
processedMicrobiome = CMTFtoolbox::Georgiou2025$Tooth_microbiome

# Remove samples due to low library size
mask = rowSums(processedMicrobiome$data) > 6500
processedMicrobiome$data = processedMicrobiome$data[mask,]
processedMicrobiome$mode1 = processedMicrobiome$mode1[mask,]

# Remove duplicate samples
mask = !(processedMicrobiome$mode1$SampleID %in% c("A11-8 36", "A11-10 17", "A11-15 17"))
processedMicrobiome$data = processedMicrobiome$data[mask,]
processedMicrobiome$mode1 = processedMicrobiome$mode1[mask,]

# Also remove subject A11-8 due to being an outlier
processedMicrobiome$data = processedMicrobiome$data[-23,,]
processedMicrobiome$mode1 = processedMicrobiome$mode1[-23,]

# CLR transformation
df = processedMicrobiome$data + 1
geomeans = pracma::geomean(as.matrix(df), dim=2)
df_clr = log(sweep(df, 1, geomeans, FUN="/"))

# Feature filtering
sparsityThreshold = 0.5
maskA = processedMicrobiome$mode1$PainS_NopainA == "A"
maskS = processedMicrobiome$mode1$PainS_NopainA == "S"

dfA = processedMicrobiome$data[maskA,]
dfS = processedMicrobiome$data[maskS,]

sparsityA = colSums(dfA == 0) / nrow(dfA)
sparsityS = colSums(dfS == 0) / nrow(dfS)

mask = (sparsityA <= sparsityThreshold) | (sparsityS <= sparsityThreshold)

processedMicrobiome$data = df_clr[,mask]
processedMicrobiome$mode2 = processedMicrobiome$mode2[mask,]

# Center and scale
processedMicrobiome$data = sweep(processedMicrobiome$data, 2, colMeans(processedMicrobiome$data), FUN="-")
processedMicrobiome$data = sweep(processedMicrobiome$data, 2, apply(processedMicrobiome$data, 2, sd), FUN="/")

# Cytokines
processedCytokines_case = CMTFtoolbox::Georgiou2025$Inflammatory_mediators

# Select only case subjects
mask = processedCytokines_case$mode1$case_control == "case"
processedCytokines_case$data = processedCytokines_case$data[mask,,]
processedCytokines_case$mode1 = processedCytokines_case$mode1[mask,]

# Select only samples with corresponding microbiome data
mask = processedCytokines_case$mode1$SubjectID %in% processedMicrobiome$mode1$SubjectID
processedCytokines_case$data = processedCytokines_case$data[mask,,]
processedCytokines_case$mode1 = processedCytokines_case$mode1[mask,]

processedCytokines_case$data = log(processedCytokines_case$data + 0.005)
processedCytokines_case$data = multiwayCenter(processedCytokines_case$data, 1)
processedCytokines_case$data = multiwayScale(processedCytokines_case$data, 2)

# Prep data
datasets = list(processedCytokines_case$data, as.matrix(processedMicrobiome$data))
modes = list(c(1,2,3),c(1,4))
Z = setupCMTFdata(datasets, modes, normalize=TRUE)
# Too computationally intensive.
acmtf_model = CMTFtoolbox::acmtf_opt(Z, 2, nstart = 10, method="L-BFGS", numCores=10)
lambda = abs(acmtf_model$Fac[[5]])
colnames(lambda) = paste0("C", 1:2)

lambda %>%
  as_tibble() %>%
  mutate(block=c("cytokines", "microbiome")) %>%
  mutate(block=factor(block, levels=c("cytokines", "microbiome"))) %>%
  pivot_longer(-block) %>%
  ggplot(aes(x=as.factor(name),y=value,fill=as.factor(block))) +
  geom_bar(stat="identity",position=position_dodge(),col="black") +
  xlab("ACMTF component number") +
  ylab(expression(lambda)) +
  scale_x_discrete(labels=1:3) +
  scale_fill_manual(name="Dataset",values = hue_pal()(2),labels=c("Inflammatory mediators", "Tooth microbiome")) +
  theme(legend.position="top", text=element_text(size=16))

df = processedCytokines_case$mode1 %>% mutate(V1=acmtf_model$Fac[[1]][,1],V2=acmtf_model$Fac[[1]][,2]) %>% mutate(Gender = as.numeric(as.factor(Gender)), PainS_NopainA = as.numeric(as.factor(PainS_NopainA)))
summary(lm(V1 ~ Gender + PainS_NopainA, data=df))
#> 
#> Call:
#> lm(formula = V1 ~ Gender + PainS_NopainA, data = df)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.21124 -0.09583 -0.03519  0.07407  0.31275 
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)   -0.64875    0.13502  -4.805 0.000108 ***
#> Gender         0.15494    0.06195   2.501 0.021190 *  
#> PainS_NopainA  0.28024    0.06148   4.558 0.000191 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.1472 on 20 degrees of freedom
#> Multiple R-squared:  0.5671, Adjusted R-squared:  0.5238 
#> F-statistic:  13.1 on 2 and 20 DF,  p-value: 0.0002311
summary(lm(V2 ~ Gender + PainS_NopainA, data=df))
#> 
#> Call:
#> lm(formula = V2 ~ Gender + PainS_NopainA, data = df)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -0.40028 -0.14344  0.01718  0.13641  0.35876 
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)
#> (Intercept)   -0.04116    0.19947  -0.206    0.839
#> Gender         0.08208    0.09152   0.897    0.380
#> PainS_NopainA -0.05142    0.09083  -0.566    0.578
#> 
#> Residual standard error: 0.2174 on 20 degrees of freedom
#> Multiple R-squared:  0.05506,    Adjusted R-squared:  -0.03944 
#> F-statistic: 0.5826 on 2 and 20 DF,  p-value: 0.5676

ACMTF-R

Y = as.numeric(as.factor(processedCytokines_case$mode1$PainS_NopainA))
Ycnt = Y - mean(Y)
Ynorm = Ycnt / norm(Ycnt, "2")
Ynorm = as.matrix(Ynorm)
# Too computationally intensive.
acmtfr_model90 = CMTFtoolbox::acmtfr_opt(Z, Ynorm, 1, pi=0.90, nstart = 10, method="L-BFGS", numCores=10)
lambda = abs(acmtfr_model90$Fac[[5]])
colnames(lambda) = paste0("C", 1)

lambda %>%
  as_tibble() %>%
  mutate(block=c("cytokines", "microbiome")) %>%
  mutate(block=factor(block, levels=c("cytokines", "microbiome"))) %>%
  pivot_longer(-block) %>%
  ggplot(aes(x=as.factor(name),y=value,fill=as.factor(block))) +
  geom_bar(stat="identity",position=position_dodge(),col="black") +
  xlab("ACMTF-R component number") +
  ylab(expression(lambda)) +
  scale_x_discrete(labels=1:3) +
  scale_fill_manual(name="Dataset",values = hue_pal()(2),labels=c("Inflammatory mediators", "Tooth microbiome")) +
  theme(legend.position="top", text=element_text(size=16))

df = processedCytokines_case$mode1 %>% mutate(V1=acmtfr_model90$Fac[[1]][,1],) %>% mutate(Gender = as.numeric(as.factor(Gender)), PainS_NopainA = as.numeric(as.factor(PainS_NopainA)))
summary(lm(V1 ~ Gender + PainS_NopainA, data=df))
#> 
#> Call:
#> lm(formula = V1 ~ Gender + PainS_NopainA, data = df)
#> 
#> Residuals:
#>       Min        1Q    Median        3Q       Max 
#> -0.192779 -0.058594 -0.001544  0.046445  0.210357 
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)   -0.69489    0.09292  -7.478 3.25e-07 ***
#> Gender         0.10032    0.04264   2.353    0.029 *  
#> PainS_NopainA  0.36254    0.04231   8.568 3.97e-08 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.1013 on 20 degrees of freedom
#> Multiple R-squared:  0.7949, Adjusted R-squared:  0.7744 
#> F-statistic: 38.76 on 2 and 20 DF,  p-value: 1.316e-07