Skip to contents

Preamble

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(parafac4microbiome)
library(NPLStoolbox)
library(CMTFtoolbox)
#> 
#> Attaching package: 'CMTFtoolbox'
#> The following object is masked from 'package:NPLStoolbox':
#> 
#>     npred
#> The following objects are masked from 'package:parafac4microbiome':
#> 
#>     fac_to_vect, reinflateFac, reinflateTensor, vect_to_fac
library(ggplot2)
library(ggpubr)
library(scales)
library(ggpattern)
rf_data = read.csv("./TIFN2/RFdata.csv")
colnames(rf_data) = c("subject", "id", "fotonr", "day", "group", "RFgroup", "MQH", "SPS(tm)", "Area_delta_R30", "Area_delta_Rmax", "Area_delta_R30_x_Rmax", "gingiva_mean_R_over_G", "gingiva_mean_R_over_G_upper_jaw", "gingiva_mean_R_over_G_lower_jaw")
rf_data = rf_data %>% as_tibble()

rf_data[rf_data$subject == "VSTPHZ", 1] = "VSTPH2"
rf_data[rf_data$subject == "D2VZH0", 1] = "DZVZH0"
rf_data[rf_data$subject == "DLODNN", 1] = "DLODDN"
rf_data[rf_data$subject == "O3VQFX", 1] = "O3VQFQ"
rf_data[rf_data$subject == "F80LGT", 1] = "F80LGF"
rf_data[rf_data$subject == "26QQR0", 1] = "26QQrO"

rf_data2 = read.csv("./TIFN2/red_fluorescence_data.csv") %>% as_tibble()
rf_data2 = rf_data2[,c(2,4,181:192)]
rf_data = rf_data %>% left_join(rf_data2)
#> Joining with `by = join_by(id, day)`

rf = rf_data %>% select(subject, RFgroup) %>% unique()
age_gender = read.csv("./TIFN2/Ploeg_subjectMetadata.csv", sep=";")
age_gender = age_gender[2:nrow(age_gender),2:ncol(age_gender)]
age_gender = age_gender %>% as_tibble() %>% filter(onderzoeksgroep == 0) %>% select(naam, leeftijd, geslacht)
colnames(age_gender) = c("subject", "age", "gender")

# Correction for incorrect subject ids
age_gender[age_gender$subject == "VSTPHZ", 1] = "VSTPH2"
age_gender[age_gender$subject == "D2VZH0", 1] = "DZVZH0"
age_gender[age_gender$subject == "DLODNN", 1] = "DLODDN"
age_gender[age_gender$subject == "O3VQFX", 1] = "O3VQFQ"
age_gender[age_gender$subject == "F80LGT", 1] = "F80LGF"
age_gender[age_gender$subject == "26QQR0", 1] = "26QQrO"

age_gender = age_gender %>% arrange(subject)

mapping = c(-14,0,2,5,9,14,21)

testMetadata = function(model, comp, metadata){
  transformedSubjectLoadings = transformPARAFACloadings(model$Fac, 1)[,comp]
  transformedSubjectLoadings = transformedSubjectLoadings / norm(transformedSubjectLoadings, "2")
  
  metadata = metadata$mode1 %>% left_join(vanderPloeg2024$red_fluorescence %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject")
  
  result = lm(transformedSubjectLoadings ~ plaquepercent + bomppercent + Area_delta_R30 + gender + age, data=metadata)
  
  # Extract coefficients and confidence intervals
  coef_estimates <- summary(result)$coefficients
  conf_intervals <- confint(result)
  
  # Remove intercept
  coef_estimates <- coef_estimates[rownames(coef_estimates) != "(Intercept)", ]
  conf_intervals <- conf_intervals[rownames(conf_intervals) != "(Intercept)", ]
  
  # Combine into a clean data frame
  summary_table <- data.frame(
    Term     = rownames(coef_estimates),
    Estimate = coef_estimates[, "Estimate"] * 1e3,
    CI       = paste0(
                 conf_intervals[, 1], " – ",
                 conf_intervals[, 2]
               ),
    P_value  = coef_estimates[, "Pr(>|t|)"],
    P_adjust = p.adjust(coef_estimates[, "Pr(>|t|)"], "BH"),
    row.names = NULL
  )
  
  return(summary_table)
}

testFeatures = function(model, metadata, componentNum, metadataVar){
  df = metadata$mode1 %>% left_join(rf_data %>% filter(day==14)) %>% left_join(age_gender)
  topIndices = metadata$mode2 %>% mutate(index=1:nrow(.), Comp = model$Fac[[2]][,componentNum]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
  bottomIndices = metadata$mode2 %>% mutate(index=1:nrow(.), Comp = model$Fac[[2]][,componentNum]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()
  
  timepoint = which(abs(model$Fac[[3]][,componentNum]) == max(abs(model$Fac[[3]][,componentNum])))
  
  Xhat = parafac4microbiome::reinflateTensor(model$Fac[[1]][,componentNum], model$Fac[[2]][,componentNum], model$Fac[[3]][,componentNum])
  y = df[metadataVar]
  
  print("Positive loadings:")
  print(cor(Xhat[,topIndices[1],timepoint], y))
  print("Negative loadings:")
  print(cor(Xhat[,bottomIndices[1],timepoint], y))
}

plotFeatures = function(mode2, model, componentNum, flip=FALSE){
  
  if(flip==TRUE){
    df = mode2 %>% mutate(Comp = -1*model$Fac[[2]][,componentNum]) %>% arrange(Comp) %>% filter(Species != "") %>% mutate(index=1:nrow(.), name = paste(Genus, Species))
  } else{
    df = mode2 %>% mutate(Comp = model$Fac[[2]][,componentNum]) %>% arrange(Comp) %>% filter(Species != "") %>% mutate(index=1:nrow(.), name = paste(Genus, Species))
  }
  df = rbind(df %>% head(n=10), df %>% tail(n=10))
  
  plot=df %>%
    ggplot(aes(x=Comp,y=as.factor(index),fill=as.factor(Phylum))) +
    geom_bar(stat="identity", col="black") +
    scale_y_discrete(label=df$name) +
    xlab("Loading") +
    ylab("") +
    guides(fill=guide_legend(title="Phylum")) +
    theme(text=element_text(size=16))
  
  return(plot)
}
phylum_colors = hue_pal()(7)
processedTongue = processDataCube(parafac4microbiome::vanderPloeg2024$tongue, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedLowling = processDataCube(parafac4microbiome::vanderPloeg2024$lower_jaw_lingual, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedLowinter = processDataCube(parafac4microbiome::vanderPloeg2024$lower_jaw_interproximal, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedUpling = processDataCube(parafac4microbiome::vanderPloeg2024$upper_jaw_lingual, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedUpinter = processDataCube(parafac4microbiome::vanderPloeg2024$upper_jaw_interproximal, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedSaliva = processDataCube(parafac4microbiome::vanderPloeg2024$saliva, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedMetabolomics = parafac4microbiome::vanderPloeg2024$metabolomics
Y = processedTongue$mode1 %>% left_join(vanderPloeg2024$red_fluorescence %>% select(subject,Area_delta_R30,day) %>% filter(day==14)) %>% select(Area_delta_R30) %>% pull()
#> Joining with `by = join_by(subject)`
Ycnt = Y - mean(Y)

Ymetab = processedMetabolomics$mode1 %>% left_join(vanderPloeg2024$red_fluorescenc %>% select(subject,Area_delta_R30,day) %>% filter(day==14)) %>% select(Area_delta_R30) %>% pull()
#> Joining with `by = join_by(subject)`
Ymetab_cnt = Ymetab - mean(Ymetab)
# CV_tongue = NPLStoolbox::ncrossreg(processedTongue$data, Ycnt, maxNumComponents = 10)
# CV_lowling = NPLStoolbox::ncrossreg(processedLowling$data, Ycnt, maxNumComponents = 10)
# CV_lowinter = NPLStoolbox::ncrossreg(processedLowinter$data, Ycnt, maxNumComponents = 10)
# CV_upling = NPLStoolbox::ncrossreg(processedUpling$data, Ycnt, maxNumComponents = 10)
# CV_upinter = NPLStoolbox::ncrossreg(processedUpinter$data, Ycnt, maxNumComponents = 10)
# CV_saliva = NPLStoolbox::ncrossreg(processedSaliva$data, Ycnt, maxNumComponents = 10)
# CV_metab = NPLStoolbox::ncrossreg(processedMetabolomics$data, Ymetab_cnt, maxNumComponents = 10)
# 
# saveRDS(CV_tongue, "./NPLS_CV_tongue.RDS")
# saveRDS(CV_lowling, "./NPLS_CV_lowling.RDS")
# saveRDS(CV_lowinter, "./NPLS_CV_lowinter.RDS")
# saveRDS(CV_upling, "./NPLS_CV_upling.RDS")
# saveRDS(CV_upinter, "./NPLS_CV_upinter.RDS")
# saveRDS(CV_saliva, "./NPLS_CV_saliva.RDS")
# saveRDS(CV_metab, "./NPLS_CV_metab.RDS")
# 
# CV_tongue = readRDS("./NPLS_CV_tongue.RDS")
# CV_lowling = readRDS("./NPLS_CV_lowling.RDS")
# CV_lowinter = readRDS("./NPLS_CV_lowinter.RDS")
# CV_upling = readRDS("./NPLS_CV_upling.RDS")
# CV_upinter = readRDS("./NPLS_CV_upinter.RDS")
# CV_saliva = readRDS("./NPLS_CV_saliva.RDS")
# CV_metab = readRDS("./NPLS_CV_metab.RDS")
# 
# a=CV_tongue$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_tongue$RMSE %>% ggplot(aes(x=as.factor(numComponents),y=RMSE,group=1)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b)
# 
# a=CV_lowling$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_lowling$RMSE %>% ggplot(aes(x=as.factor(numComponents),y=RMSE,group=1)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b)
# 
# a=CV_lowinter$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_lowinter$RMSE %>% ggplot(aes(x=as.factor(numComponents),y=RMSE,group=1)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b)
# 
# a=CV_upling$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_upling$RMSE %>% ggplot(aes(x=as.factor(numComponents),y=RMSE,group=1)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b)
# 
# a=CV_upinter$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_upinter$RMSE %>% ggplot(aes(x=as.factor(numComponents),y=RMSE,group=1)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b)
# 
# a=CV_saliva$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_saliva$RMSE %>% ggplot(aes(x=as.factor(numComponents),y=RMSE,group=1)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b)
# 
# a=CV_metab$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_metab$RMSE %>% ggplot(aes(x=as.factor(numComponents),y=RMSE,group=1)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b)
# npls_tongue = NPLStoolbox::triPLS1(processedTongue$data, Ycnt, 3)
# npls_lowling = NPLStoolbox::triPLS1(processedLowling$data, Ycnt, 2)
# npls_lowinter = NPLStoolbox::triPLS1(processedLowinter$data, Ycnt, 3)
# npls_upling = NPLStoolbox::triPLS1(processedUpling$data, Ycnt, 1)
# npls_upinter = NPLStoolbox::triPLS1(processedUpinter$data, Ycnt, 1)
# npls_saliva = NPLStoolbox::triPLS1(processedSaliva$data, Ycnt, 1)
# npls_metab = NPLStoolbox::triPLS1(processedMetabolomics$data, Ymetab_cnt, 1)
# 
# saveRDS(npls_tongue, "./NPLS_tongue.RDS")
# saveRDS(npls_lowling, "./NPLS_lowling.RDS")
# saveRDS(npls_lowinter, "./NPLS_lowinter.RDS")
# saveRDS(npls_upling, "./NPLS_upling.RDS")
# saveRDS(npls_upinter, "./NPLS_upinter.RDS")
# saveRDS(npls_saliva, "./NPLS_saliva.RDS")
# saveRDS(npls_metab, "./NPLS_metab.RDS")

npls_tongue = readRDS("./TIFN2/NPLS_tongue.RDS")
npls_lowling = readRDS("./TIFN2/NPLS_lowling.RDS")
npls_lowinter = readRDS("./TIFN2/NPLS_lowinter.RDS")
npls_upling = readRDS("./TIFN2/NPLS_upling.RDS")
npls_upinter = readRDS("./TIFN2/NPLS_upinter.RDS")
npls_saliva = readRDS("./TIFN2/NPLS_saliva.RDS")
npls_metab = readRDS("./TIFN2/NPLS_metab.RDS")

Tongue microbiome

testMetadata(npls_tongue, 1, processedTongue)
#>             Term  Estimate                                         CI
#> 1  plaquepercent -1.474762 -0.00708656930332159 – 0.00413704497828965
#> 2    bomppercent  1.631592  -0.0018744665656207 – 0.00513764997731347
#> 3 Area_delta_R30 11.077971   0.00400772030295017 – 0.0181482211785528
#> 4         gender 36.330007        -0.063223285837 – 0.135883300205861
#> 5            age  3.166008  -0.00570115787648339 – 0.0120331734187125
#>       P_value   P_adjust
#> 1 0.597056195 0.59705619
#> 2 0.351269153 0.59170540
#> 3 0.003072009 0.01536005
#> 4 0.463727425 0.59170540
#> 5 0.473364318 0.59170540
testMetadata(npls_tongue, 2, processedTongue)
#>             Term   Estimate                                          CI
#> 1  plaquepercent   5.382195  -0.000686132570085629 – 0.0114505233390118
#> 2    bomppercent  -1.872716  -0.00566399213852543 – 0.00191856046358201
#> 3 Area_delta_R30   1.020862   -0.00662455365668888 – 0.0086662777282295
#> 4         gender -21.544052     -0.129196011019746 – 0.0861079065423951
#> 5            age -10.473955 -0.0200624648453823 – -0.000885444742297401
#>      P_value  P_adjust
#> 1 0.08039408 0.2009852
#> 2 0.32284992 0.5380832
#> 3 0.78792781 0.7879278
#> 4 0.68701099 0.7879278
#> 5 0.03317110 0.1658555
testMetadata(npls_tongue, 3, processedTongue)
#>             Term    Estimate                                         CI
#> 1  plaquepercent  -7.5520385 -0.0125229141152452 – -0.00258116287242108
#> 2    bomppercent  -0.7879148 -0.00389354174770562 – 0.00231771218586553
#> 3 Area_delta_R30  -0.2122868 -0.00647503505852898 – 0.00605046150851769
#> 4         gender -29.3520691    -0.117535255934982 – 0.0588311176394589
#> 5            age  15.5190376   0.00766460204790137 – 0.0233734731629338
#>        P_value    P_adjust
#> 1 0.0039690081 0.009922520
#> 2 0.6097561177 0.762195147
#> 3 0.9455292860 0.945529286
#> 4 0.5036538142 0.762195147
#> 5 0.0003022874 0.001511437
testFeatures(npls_tongue, processedTongue, 1, "Area_delta_R30") # flip
#> Joining with `by = join_by(subject, RFgroup)`
#> Joining with `by = join_by(subject, age, gender)`
#> [1] "Positive loadings:"
#>      Area_delta_R30
#> [1,]     -0.5331209
#> [1] "Negative loadings:"
#>      Area_delta_R30
#> [1,]      0.5331209

a = processedTongue$mode1 %>%
  mutate(Component_1 = npls_tongue$Fac[[1]][,1]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=Area_delta_R30,y=Component_1)) +
  geom_point() +
  xlab("RF%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedTongue$mode2, npls_tongue, 1, flip=TRUE) + scale_fill_manual(values=phylum_colors[-c(3,7)]) + theme(legend.position="none")

c = processedTongue$mode3 %>%
  mutate(Component_1 = -1*npls_tongue$Fac[[3]][,1], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

testFeatures(npls_tongue, processedTongue, 2, "age") # flip
#> Joining with `by = join_by(subject, RFgroup)`
#> Joining with `by = join_by(subject, age, gender)`
#> [1] "Positive loadings:"
#>             age
#> [1,] -0.2936364
#> [1] "Negative loadings:"
#>            age
#> [1,] 0.2936364

a = processedTongue$mode1 %>%
  mutate(Component_1 = npls_tongue$Fac[[1]][,2]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=age,y=Component_1)) +
  geom_point() +
  xlab("Age") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedTongue$mode2, npls_tongue, 2, flip=TRUE) + scale_fill_manual(values=phylum_colors[-c(6,7)]) + theme(legend.position="none")

c = processedTongue$mode3 %>%
  mutate(Component_1 = npls_tongue$Fac[[3]][,2], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

testFeatures(npls_tongue, processedTongue, 3, "age") # flip
#> Joining with `by = join_by(subject, RFgroup)`
#> Joining with `by = join_by(subject, age, gender)`
#> [1] "Positive loadings:"
#>             age
#> [1,] -0.5022835
#> [1] "Negative loadings:"
#>            age
#> [1,] 0.5022835
testFeatures(npls_tongue, processedTongue, 3, "plaquepercent") # flip
#> Joining with `by = join_by(subject, RFgroup)`
#> Joining with `by = join_by(subject, age, gender)`
#> [1] "Positive loadings:"
#>      plaquepercent
#> [1,]     0.4065813
#> [1] "Negative loadings:"
#>      plaquepercent
#> [1,]    -0.4065813

a = processedTongue$mode1 %>%
  mutate(Component_1 = npls_tongue$Fac[[1]][,3]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=age,y=Component_1)) +
  geom_point() +
  xlab("Age") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedTongue$mode2, npls_tongue, 3, flip=TRUE) + scale_fill_manual(values=phylum_colors[-c(3,7)]) + theme(legend.position="none")

c = processedTongue$mode3 %>%
  mutate(Component_1 = -1*npls_tongue$Fac[[3]][,3], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

Lower jaw lingual microbiome

testMetadata(npls_lowling, 1, processedLowling)
#>             Term  Estimate                                          CI
#> 1  plaquepercent  3.715363 -0.000645113811345087 – 0.00807583996130901
#> 2    bomppercent  2.342250 -0.000382021577102148 – 0.00506652135078373
#> 3 Area_delta_R30 13.104425    0.00761071083531463 – 0.0185981388922776
#> 4         gender 73.153268    -0.00420146304171221 – 0.150507999448965
#> 5            age  2.369166  -0.00452078411448064 – 0.00925911597240414
#>        P_value     P_adjust
#> 1 9.248392e-02 0.1156049046
#> 2 8.968515e-02 0.1156049046
#> 3 2.579248e-05 0.0001289624
#> 4 6.305775e-02 0.1156049046
#> 5 4.897460e-01 0.4897460333
testMetadata(npls_lowling, 2, processedLowling)
#>             Term    Estimate                                         CI
#> 1  plaquepercent   -4.114017  -0.0101326963347122 – 0.00190466133665912
#> 2    bomppercent   -2.165860 -0.00592611691772569 – 0.00159439766365394
#> 3 Area_delta_R30    1.603245 -0.00597961861614078 – 0.00918610807591475
#> 4         gender -108.454959  -0.215226144055683 – -0.00168377429959159
#> 5            age   -2.999926  -0.0125099860507945 – 0.00651013380601685
#>      P_value  P_adjust
#> 1 0.17401162 0.4169640
#> 2 0.25017839 0.4169640
#> 3 0.67038968 0.6703897
#> 4 0.04668113 0.2334057
#> 5 0.52608794 0.6576099
testFeatures(npls_lowling, processedLowling, 1, "Area_delta_R30") # flip
#> Joining with `by = join_by(subject, RFgroup)`
#> Joining with `by = join_by(subject, age, gender)`
#> [1] "Positive loadings:"
#>      Area_delta_R30
#> [1,]     -0.6772569
#> [1] "Negative loadings:"
#>      Area_delta_R30
#> [1,]      0.6772569

a = processedLowling$mode1 %>%
  mutate(Component_1 = npls_lowling$Fac[[1]][,1]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=Area_delta_R30,y=Component_1)) +
  geom_point() +
  xlab("RF%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedLowling$mode2, npls_lowling, 1, flip=TRUE) + scale_fill_manual(values=phylum_colors[-7]) + theme(legend.position="none")

c = processedLowling$mode3 %>%
  mutate(Component_1 = -1*npls_lowling$Fac[[3]][,1], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

testFeatures(npls_lowling, processedLowling, 2, "gender") # no flip
#> Joining with `by = join_by(subject, RFgroup)`
#> Joining with `by = join_by(subject, age, gender)`
#> [1] "Positive loadings:"
#>         gender
#> [1,] 0.2793231
#> [1] "Negative loadings:"
#>          gender
#> [1,] -0.2793231

a = processedLowling$mode1 %>%
  mutate(Component_1 = npls_lowling$Fac[[1]][,2]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=as.factor(gender),y=Component_1)) +
  geom_boxplot() +
  xlab("Gender") +
  ylab("Loading") +
  scale_x_discrete(labels=c("Male","Female")) +
  theme(text=element_text(size=16))

b = plotFeatures(processedLowling$mode2, npls_lowling, 2, flip=FALSE) + scale_fill_manual(values=phylum_colors[-7]) + theme(legend.position="none")

c = processedLowling$mode3 %>%
  mutate(Component_1 = -1*npls_lowling$Fac[[3]][,2], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

Lower jaw interproximal microbiome

testMetadata(npls_lowinter, 1, processedLowinter)
#>             Term   Estimate                                         CI
#> 1  plaquepercent  3.3096526 -0.00145265904366952 – 0.00807196416821377
#> 2    bomppercent -0.8384309 -0.00381375443174476 – 0.00213689268722177
#> 3 Area_delta_R30 15.2208403   0.00922085935199349 – 0.0212208211724995
#> 4         gender 69.1352412    -0.0153480260964937 – 0.153618508522373
#> 5            age  2.8573140  -0.00466757148278084 – 0.0103821994077065
#>        P_value     P_adjust
#> 1 1.671132e-01 2.785220e-01
#> 2 5.709283e-01 5.709283e-01
#> 3 1.019151e-05 5.095753e-05
#> 4 1.055865e-01 2.639664e-01
#> 5 4.459610e-01 5.574513e-01
testMetadata(npls_lowinter, 2, processedLowinter)
#>             Term    Estimate                                         CI
#> 1  plaquepercent  -4.0343329  -0.0103961845528384 – 0.00232751885082657
#> 2    bomppercent   1.5868511   -0.002387808391622 – 0.00556151068560298
#> 3 Area_delta_R30  -0.7891306 -0.00880435348664343 – 0.00722609219743649
#> 4         gender -79.4547914    -0.192313852803302 – 0.0334042699832537
#> 5            age  -5.6192399  -0.0156715441175292 – 0.00443306441593625
#>     P_value  P_adjust
#> 1 0.2064097 0.4402644
#> 2 0.4231274 0.5289092
#> 3 0.8427373 0.8427373
#> 4 0.1618029 0.4402644
#> 5 0.2641586 0.4402644
testMetadata(npls_lowinter, 3, processedLowinter)
#>             Term    Estimate                                         CI
#> 1  plaquepercent   1.9616635 -0.00441084473003135 – 0.00833417182067836
#> 2    bomppercent  -2.5562100 -0.00653752736785625 – 0.00142510740850312
#> 3 Area_delta_R30  -0.7397026 -0.00876835152455174 – 0.00728894634518655
#> 4         gender -38.8830025     -0.15193111123446 – 0.0741651061515203
#> 5            age   5.9104434   -0.0041586991794963 – 0.0159795860646214
#>     P_value  P_adjust
#> 1 0.5360685 0.6700857
#> 2 0.2009361 0.6035466
#> 3 0.8527095 0.8527095
#> 4 0.4896298 0.6700857
#> 5 0.2414186 0.6035466
testFeatures(npls_lowinter, processedLowinter, 1, "Area_delta_R30") # NO flip
#> Joining with `by = join_by(subject, RFgroup)`
#> Joining with `by = join_by(subject, age, gender)`
#> [1] "Positive loadings:"
#>      Area_delta_R30
#> [1,]      0.7072254
#> [1] "Negative loadings:"
#>      Area_delta_R30
#> [1,]     -0.7072254

a = processedLowinter$mode1 %>%
  mutate(Component_1 = npls_lowinter$Fac[[1]][,1]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=Area_delta_R30,y=Component_1)) +
  geom_point() +
  xlab("RF%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedLowinter$mode2, npls_lowinter, 1, flip=FALSE) + scale_fill_manual(values=phylum_colors[-7]) + theme(legend.position="none")

c = processedLowinter$mode3 %>%
  mutate(Component_1 = npls_lowinter$Fac[[3]][,1], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

Upper jaw lingual microbiome

testMetadata(npls_upling, 1, processedUpling)
#>             Term   Estimate                                          CI
#> 1  plaquepercent  4.0478033 -0.000770578431992336 – 0.00886618505872002
#> 2    bomppercent  1.2401645  -0.00177018966493134 – 0.00425051872704196
#> 3 Area_delta_R30 13.2165671    0.00714594404762599 – 0.0192871900989676
#> 4         gender 74.4980064     -0.0109799434918638 – 0.159975956363131
#> 5            age -0.7702828  -0.00838376420630882 – 0.00684319852331261
#>        P_value     P_adjust
#> 1 9.697323e-02 0.1616220476
#> 2 4.086397e-01 0.5107996315
#> 3 9.122167e-05 0.0004561084
#> 4 8.555172e-02 0.1616220476
#> 5 8.384549e-01 0.8384548793
model = npls_upling
df = processedUpling

testFeatures(model, df, 1, "Area_delta_R30") # NO flip
#> Joining with `by = join_by(subject, RFgroup)`
#> Joining with `by = join_by(subject, age, gender)`
#> [1] "Positive loadings:"
#>      Area_delta_R30
#> [1,]      0.6110045
#> [1] "Negative loadings:"
#>      Area_delta_R30
#> [1,]     -0.6110045

a = df$mode1 %>%
  mutate(Component_1 = model$Fac[[1]][,1]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=Area_delta_R30,y=Component_1)) +
  geom_point() +
  xlab("RF%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(df$mode2, model, 1, flip=FALSE) + scale_fill_manual(values=phylum_colors[-c(3,7)]) + theme(legend.position="none")

c = df$mode3 %>%
  mutate(Component_1 = model$Fac[[3]][,1], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

Upper jaw interproximal microbiome

testMetadata(npls_upinter, 1, processedUpinter)
#>             Term  Estimate                                          CI
#> 1  plaquepercent  3.339033  -0.00145139242610112 – 0.00812945878601939
#> 2    bomppercent  2.408691 -0.000584197467843378 – 0.00540157891347544
#> 3 Area_delta_R30 11.841794    0.00580639255553114 – 0.0178771953784663
#> 4         gender 81.810902     -0.00317110717769579 – 0.16679291050557
#> 5            age  1.978115  -0.00559119347772117 – 0.00954742276678969
#>        P_value    P_adjust
#> 1 0.1658958036 0.207369755
#> 2 0.1112570321 0.185428387
#> 3 0.0003277558 0.001638779
#> 4 0.0586855593 0.146713898
#> 5 0.5990911027 0.599091103
model = npls_upinter
df = processedUpinter

testFeatures(model, df, 1, "Area_delta_R30") # NO flip
#> Joining with `by = join_by(subject, RFgroup)`
#> Joining with `by = join_by(subject, age, gender)`
#> [1] "Positive loadings:"
#>      Area_delta_R30
#> [1,]      0.5857467
#> [1] "Negative loadings:"
#>      Area_delta_R30
#> [1,]     -0.5857467

a = df$mode1 %>%
  mutate(Component_1 = model$Fac[[1]][,1]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=Area_delta_R30,y=Component_1)) +
  geom_point() +
  xlab("RF%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(df$mode2, model, 1, flip=FALSE) + scale_fill_manual(values=phylum_colors[-c(3,7)]) + theme(legend.position="none")

c = df$mode3 %>%
  mutate(Component_1 = model$Fac[[3]][,1], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

Salivary microbiome

testMetadata(npls_saliva, 1, processedSaliva)
#>             Term  Estimate                                          CI
#> 1  plaquepercent  0.517096  -0.00463103466799416 – 0.00566522660467623
#> 2    bomppercent  2.351534 -0.000864835629191632 – 0.00556790361049825
#> 3 Area_delta_R30 12.091719    0.00560564942822247 – 0.0185777890517456
#> 4         gender 66.576150     -0.0247515353456588 – 0.157903835693377
#> 5            age  2.281081   -0.00585343328619292 – 0.0104155959168686
#>        P_value    P_adjust
#> 1 0.8396040573 0.839604057
#> 2 0.1466959221 0.246404995
#> 3 0.0005792527 0.002896263
#> 4 0.1478429972 0.246404995
#> 5 0.5727997357 0.715999670
model = npls_saliva
df = processedSaliva

testFeatures(model, df, 1, "Area_delta_R30") # NO flip
#> Joining with `by = join_by(subject, RFgroup)`
#> Joining with `by = join_by(subject, age, gender)`
#> [1] "Positive loadings:"
#>      Area_delta_R30
#> [1,]      0.5764045
#> [1] "Negative loadings:"
#>      Area_delta_R30
#> [1,]     -0.5764045

a = df$mode1 %>%
  mutate(Component_1 = model$Fac[[1]][,1]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=Area_delta_R30,y=Component_1)) +
  geom_point() +
  xlab("RF%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(df$mode2, model, 1, flip=FALSE) + scale_fill_manual(values=phylum_colors[-c(3,7)]) + theme(legend.position="none")

c = df$mode3 %>%
  mutate(Component_1 = model$Fac[[3]][,1], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

Salivary metabolomics

testMetadata(npls_metab, 1, processedMetabolomics)
#>             Term   Estimate                                         CI
#> 1  plaquepercent   4.293501 -0.00105841095712955 – 0.00964541325008825
#> 2    bomppercent   2.176664 -0.00109724071284544 – 0.00545056969051656
#> 3 Area_delta_R30   9.022539   0.00241777824108076 – 0.0156272996462027
#> 4         gender -28.216210    -0.121348837973755 – 0.0649164180705563
#> 5            age  -5.708618  -0.0139889510436936 – 0.00257171604047823
#>       P_value   P_adjust
#> 1 0.112260095 0.23196582
#> 2 0.185572658 0.23196582
#> 3 0.008879154 0.04439577
#> 4 0.542189172 0.54218917
#> 5 0.170255737 0.23196582
model = npls_metab
df = processedMetabolomics

testFeatures(model, df, 1, "Area_delta_R30") # NO flip
#> Joining with `by = join_by(subject, RFgroup)`
#> Joining with `by = join_by(subject, age, gender)`
#> [1] "Positive loadings:"
#>      Area_delta_R30
#> [1,]      0.5066373
#> [1] "Negative loadings:"
#>      Area_delta_R30
#> [1,]     -0.5066373

a = df$mode1 %>%
  mutate(Component_1 = model$Fac[[1]][,1]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=Area_delta_R30,y=Component_1)) +
  geom_point() +
  xlab("RF%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

temp = processedMetabolomics$mode2 %>% mutate(Component_1 = npls_metab$Fac[[2]][,1]) %>% arrange(Component_1) %>% mutate(index=1:nrow(.))
temp = rbind(temp %>% head(n=10), temp %>% tail(n=10))

b = temp %>%
  ggplot(aes(x = Component_1, y = as.factor(index),
             fill = as.factor(Type),
             pattern = as.factor(Type))) +
  geom_bar_pattern(stat = "identity",
                   colour = "black",
                   pattern = "stripe",
                   pattern_fill = "black",
                   pattern_density = 0.2,
                   pattern_spacing = 0.05,
                   pattern_angle = 45,
                   pattern_size = 0.2) +
  scale_y_discrete(labels = temp$Name) +
  xlab("Loading") +
  ylab("") +
  guides(fill = guide_legend(title = "Class"),
         pattern = "none") +
  theme(text = element_text(size = 16),
        legend.position = "none")

c = df$mode3 %>%
  mutate(Component_1 = model$Fac[[3]][,1], day=c(0,2,5,9,14)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c