TIFN2: Red fluorescent plaque development in gingivitis

Introduction

This article presents the CP results of the TIFN2 dataset discussed in Chapter 4 of my dissertation.

Preamble

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(parafac4microbiome)
library(NPLStoolbox)
library(CMTFtoolbox)

## 
## Attaching package: 'CMTFtoolbox'

## The following object is masked from 'package:NPLStoolbox':
## 
##     npred

## The following objects are masked from 'package:parafac4microbiome':
## 
##     fac_to_vect, reinflateFac, reinflateTensor, vect_to_fac

library(ggplot2)
library(ggpubr)
library(scales)

rf_data = read.csv("../../data-raw/vanderPloeg2024/RFdata.csv")
colnames(rf_data) = c("subject", "id", "fotonr", "day", "group", "RFgroup", "MQH", "SPS(tm)", "Area_delta_R30", "Area_delta_Rmax", "Area_delta_R30_x_Rmax", "gingiva_mean_R_over_G", "gingiva_mean_R_over_G_upper_jaw", "gingiva_mean_R_over_G_lower_jaw")
rf_data = rf_data %>% as_tibble()

rf_data[rf_data$subject == "VSTPHZ", 1] = "VSTPH2"
rf_data[rf_data$subject == "D2VZH0", 1] = "DZVZH0"
rf_data[rf_data$subject == "DLODNN", 1] = "DLODDN"
rf_data[rf_data$subject == "O3VQFX", 1] = "O3VQFQ"
rf_data[rf_data$subject == "F80LGT", 1] = "F80LGF"
rf_data[rf_data$subject == "26QQR0", 1] = "26QQrO"

rf_data2 = read.csv("../../data-raw/vanderPloeg2024/red_fluorescence_data.csv") %>% as_tibble()
rf_data2 = rf_data2[,c(2,4,181:192)]
rf_data = rf_data %>% left_join(rf_data2)

## Joining with `by = join_by(id, day)`

rf = rf_data %>% select(subject, RFgroup) %>% unique()

age_gender = read.csv("../../data-raw/vanderPloeg2024/Ploeg_subjectMetadata.csv", sep=";")
age_gender = age_gender[2:nrow(age_gender),2:ncol(age_gender)]
age_gender = age_gender %>% as_tibble() %>% filter(onderzoeksgroep == 0) %>% select(naam, leeftijd, geslacht)
colnames(age_gender) = c("subject", "age", "gender")

# Correction for incorrect subject ids
age_gender[age_gender$subject == "VSTPHZ", 1] = "VSTPH2"
age_gender[age_gender$subject == "D2VZH0", 1] = "DZVZH0"
age_gender[age_gender$subject == "DLODNN", 1] = "DLODDN"
age_gender[age_gender$subject == "O3VQFX", 1] = "O3VQFQ"
age_gender[age_gender$subject == "F80LGT", 1] = "F80LGF"
age_gender[age_gender$subject == "26QQR0", 1] = "26QQrO"

age_gender = age_gender %>% arrange(subject)

mapping = c(-14,0,2,5,9,14,21)

testMetadata = function(model, comp, metadata){
  transformedSubjectLoadings = model$Fac[[1]][,comp]
  transformedSubjectLoadings = transformedSubjectLoadings / norm(transformedSubjectLoadings, "2")
  
  metadata = metadata$mode1 %>% left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject")
  
  result = lm(transformedSubjectLoadings ~ plaquepercent + bomppercent + Area_delta_R30 + gender + age, data=metadata)
  
  # Extract coefficients and confidence intervals
  coef_estimates <- summary(result)$coefficients
  conf_intervals <- confint(result)
  
  # Remove intercept
  coef_estimates <- coef_estimates[rownames(coef_estimates) != "(Intercept)", ]
  conf_intervals <- conf_intervals[rownames(conf_intervals) != "(Intercept)", ]
  
  # Combine into a clean data frame
  summary_table <- data.frame(
    Term     = rownames(coef_estimates),
    Estimate = coef_estimates[, "Estimate"] * 1e3,
    CI       = paste0(
                 conf_intervals[, 1], " – ",
                 conf_intervals[, 2]
               ),
    P_value  = coef_estimates[, "Pr(>|t|)"],
    P_adjust = p.adjust(coef_estimates[, "Pr(>|t|)"], "BH"),
    row.names = NULL
  )
  
  return(summary_table)
}

testFeatures = function(model, metadata, componentNum, metadataVar){
  df = metadata$mode1 %>% left_join(rf_data %>% filter(day==14)) %>% left_join(age_gender)
  topIndices = metadata$mode2 %>% mutate(index=1:nrow(.), Comp = model$Fac[[2]][,componentNum]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
  bottomIndices = metadata$mode2 %>% mutate(index=1:nrow(.), Comp = model$Fac[[2]][,componentNum]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()
  
  timepoint = which(abs(model$Fac[[3]][,componentNum]) == max(abs(model$Fac[[3]][,componentNum])))
  
  Xhat = parafac4microbiome::reinflateTensor(model$Fac[[1]][,componentNum], model$Fac[[2]][,componentNum], model$Fac[[3]][,componentNum])
  y = df[metadataVar]
  
  print("Positive loadings:")
  print(cor(Xhat[,topIndices[1],timepoint], y))
  print("Negative loadings:")
  print(cor(Xhat[,bottomIndices[1],timepoint], y))
}

plotFeatures = function(mode2, model, componentNum, flip=FALSE){
  
  if(flip==TRUE){
    df = mode2 %>% mutate(Comp = -1*model$Fac[[2]][,componentNum]) %>% arrange(Comp) %>% filter(Species != "") %>% mutate(index=1:nrow(.), name = paste(Genus, Species))
  } else{
    df = mode2 %>% mutate(Comp = model$Fac[[2]][,componentNum]) %>% arrange(Comp) %>% filter(Species != "") %>% mutate(index=1:nrow(.), name = paste(Genus, Species))
  }
  df = rbind(df %>% head(n=10), df %>% tail(n=10))
  
  plot=df %>%
    ggplot(aes(x=Comp,y=as.factor(index),fill=as.factor(Phylum))) +
    geom_bar(stat="identity", col="black") +
    scale_y_discrete(label=df$name) +
    xlab("Loading") +
    ylab("") +
    guides(fill=guide_legend(title="Phylum")) +
    theme(text=element_text(size=16))
  
  return(plot)
}

phylum_colors = hue_pal()(7)

processedTongue = processDataCube(parafac4microbiome::vanderPloeg2024$tongue, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedLowling = processDataCube(parafac4microbiome::vanderPloeg2024$lower_jaw_lingual, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedLowinter = processDataCube(parafac4microbiome::vanderPloeg2024$lower_jaw_interproximal, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedUpling = processDataCube(parafac4microbiome::vanderPloeg2024$upper_jaw_lingual, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedUpinter = processDataCube(parafac4microbiome::vanderPloeg2024$upper_jaw_interproximal, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedSaliva = processDataCube(parafac4microbiome::vanderPloeg2024$saliva, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedMetabolomics = parafac4microbiome::vanderPloeg2024$metabolomics

colourCols = c("", "", "")
legendTitles = c("RF group", "Phylum", "")
xLabels = c("Subject index", "Feature index", "Time index")
legendColNums = c(3,5,0)
arrangeModes = c(TRUE, TRUE, FALSE)
continuousModes = c(FALSE,FALSE,TRUE)

Tongue microbiome

CP was applied to capture the dominant source of variation in the tongue microbiome data block. The model selection procedure revealed that CORCONDIA scores were inconclusive and loadings were unstable at three components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_tongue = parafac4microbiome::assessModelQuality(processedTongue$data, numRepetitions=10, numCores=10)

# assessment_tongue$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# stability_tongue = parafac4microbiome::assessModelStability(processedTongue, maxNumComponents=3, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_tongue$modelPlots[[2]]
# stability_tongue$modelPlots[[3]]

# cp_tongue = parafac4microbiome::parafac(processedTongue$data, nfac=2, nstart=100)

cp_tongue = readRDS("./TIFN2/cp_tongue.RDS")
cp_tongue$varExp

## [1] 31.37847

Therefore, a two-component CP model was selected, explaining 31.4% of the variation. However, MLR analysis revealed that the first component captured variation associated with a mixture of plaque% and subject age, while the second component was not interpretable.

testMetadata(cp_tongue, 1, processedTongue)

##             Term  Estimate                                         CI
## 1  plaquepercent -7.085726 -0.0125436561188763 – -0.00162779504204587
## 2    bomppercent  1.189991 -0.00221993077742614 – 0.00459991246706438
## 3 Area_delta_R30  2.527468 -0.00434891510321956 – 0.00940385109507837
## 4         gender 11.109220    -0.0857143066863078 – 0.107932746588392
## 5            age 13.793423   0.00516939627811755 – 0.0224174495211815
##       P_value   P_adjust
## 1 0.012433782 0.03108445
## 2 0.483348754 0.60418594
## 3 0.460540316 0.60418594
## 4 0.817174114 0.81717411
## 5 0.002572981 0.01286491

testMetadata(cp_tongue, 2, processedTongue)

##             Term   Estimate                                         CI
## 1  plaquepercent  -0.877180 -0.00719453415388883 – 0.00544017410099945
## 2    bomppercent  -2.057600  -0.0060044593190253 – 0.00188925874240208
## 3 Area_delta_R30  -5.035993   -0.012995154144287 – 0.00292316758866404
## 4         gender -42.102114    -0.154171789877283 – 0.0699675615970461
## 5            age   2.246435  -0.00773555910142959 – 0.0122284290033156
##     P_value  P_adjust
## 1 0.7796915 0.7796915
## 2 0.2971467 0.7428669
## 3 0.2074012 0.7428669
## 4 0.4507719 0.7512864
## 5 0.6505852 0.7796915

testFeatures(cp_tongue, processedTongue, 1, "plaquepercent") # no flip

## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`

## [1] "Positive loadings:"
##      plaquepercent
## [1,]     0.3063565
## [1] "Negative loadings:"
##      plaquepercent
## [1,]    -0.3063565

testFeatures(cp_tongue, processedTongue, 1, "age") # flip

## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`

## [1] "Positive loadings:"
##             age
## [1,] -0.4538388
## [1] "Negative loadings:"
##            age
## [1,] 0.4538388

a = processedTongue$mode1 %>%
  mutate(Component_1 = cp_tongue$Fac[[1]][,1]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=plaquepercent,y=Component_1)) +
  geom_point() +
  xlab("Plaque%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedTongue$mode2, cp_tongue, 1, flip=FALSE) + scale_fill_manual(values=phylum_colors[-c(3,7)]) + theme(legend.position="none")

c = processedTongue$mode3 %>%
  mutate(Component_1 = -1*cp_tongue$Fac[[3]][,1], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

Lower jaw lingual microbiome

CP was applied to capture the dominant source of variation in the lower jaw lingual microbiome data block. The model selection procedure revealed CORCONDIA scores were too low at two components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_lowling = parafac4microbiome::assessModelQuality(processedLowling$data, numRepetitions=10, numCores=10)

# assessment_lowling$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# stability_lowling = parafac4microbiome::assessModelStability(processedLowling, maxNumComponents=2, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_lowling$modelPlots[[1]]
# stability_lowling$modelPlots[[2]]

# cp_lowling = parafac4microbiome::parafac(processedLowling$data, nfac=1, nstart=100)
cp_lowling = readRDS("./TIFN2/cp_lowling.RDS")

cp_lowling$varExp

## [1] 10.55114

Therefore, a one-component CP model was selected, explaining 10.6% of the variation. Surprisingly, MLR analysis revealed that the CP component captured variation exclusively associated with RF%.

testMetadata(cp_lowling, 1, processedLowling)

##             Term   Estimate                                          CI
## 1  plaquepercent  -4.047703 -0.00897953402999654 – 0.000884127338494313
## 2    bomppercent  -2.597078 -0.00567831104112229 – 0.000484155105742315
## 3 Area_delta_R30 -10.235675  -0.0164492308704168 – -0.00402211882627686
## 4         gender -83.222465    -0.170712996229106 – 0.00426806527981913
## 5            age  -2.544554    -0.0103372952848458 – 0.0052481867268659
##      P_value    P_adjust
## 1 0.10460587 0.130757338
## 2 0.09591003 0.130757338
## 3 0.00197737 0.009886849
## 4 0.06160528 0.130757338
## 5 0.51174413 0.511744134

testFeatures(cp_lowling, processedLowling, 1, "Area_delta_R30") # flip

## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`

## [1] "Positive loadings:"
##      Area_delta_R30
## [1,]     -0.5298343
## [1] "Negative loadings:"
##      Area_delta_R30
## [1,]      0.5298343

a = processedLowling$mode1 %>%
  mutate(Component_1 = cp_lowling$Fac[[1]][,1]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=Area_delta_R30,y=Component_1)) +
  geom_point() +
  xlab("RF%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedLowling$mode2, cp_lowling, 1, flip=TRUE) + scale_fill_manual(values=phylum_colors[-7]) + theme(legend.position="none")

c = processedLowling$mode3 %>%
  mutate(Component_1 = cp_lowling$Fac[[3]][,1], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

Positively loaded microbiota including Fusobacterium canifelinum / nucleatum, Aggregatibacter segnis, Capnocytophaga leadbetteri, Campylobacter concisus, Fusobacterium hwasookii / nucleatum / periodonticum, Cardiobacterium hominis, Porphyromonas pasteri, Corynebacterium matruchotii, Capnocytophaga granulosa, and Fusobacterium nucleatum were enriched in high responders and depleted in low responders. Conversely, negatively loaded microbiota including Campylobacter concisus, Rothia dentocariosa, Veillonella atypica / dispar, Atopobium parvulum, Prevotella melaninogenica, Veillonella dispar / parvula, Veillonella parvula / rogosae / tobetsuensis, Actinomyces naeslundii / oris / viscosus, Prevotella nanceiensis, and Kingella oralis were enriched in low responders and depleted in high responders. The time mode loadings of the component indicate that the inter-subject differences due to RF% were constant throughout the time series.

Lower jaw interproximal microbiome

CP was applied to capture the dominant source of variation in the lower jaw interproximal microbiome data block. The model selection procedure revealed that CORCONDIA scores were too low at three components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_lowinter = parafac4microbiome::assessModelQuality(processedLowinter$data, numRepetitions=10, numCores=10)

# assessment_lowinter$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# 
# stability_lowinter = parafac4microbiome::assessModelStability(processedLowinter, maxNumComponents=3, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_lowinter$modelPlots[[2]]
# stability_lowinter$modelPlots[[3]]
#

# cp_lowinter = parafac4microbiome::parafac(processedLowinter$data, nfac=2, nstart=100)
cp_lowinter = readRDS("./TIFN2/cp_lowinter.RDS")

cp_lowinter$varExp

## [1] 10.32969

Therefore, a two-component CP model was selected, explaining 10.3% of the variation. However, MLR analysis revealed that the first CP component captured uninterpretable variation, while the second component captured variation exclusively associated with BOMP%.

testMetadata(cp_lowinter, 1, processedLowinter)

##             Term    Estimate                                          CI
## 1  plaquepercent  -2.8231325  -0.00885260016583814 – 0.00320633515325942
## 2    bomppercent  -3.6808351 -0.00744783286106304 – 8.61626701053834e-05
## 3 Area_delta_R30  -0.6104507  -0.00820690677640243 – 0.00698600534263869
## 4         gender -24.7799778     -0.131742556135758 – 0.0821826004540417
## 5            age  -3.2188894   -0.0127459966584142 – 0.00630821784418125
##      P_value  P_adjust
## 1 0.34835493 0.8013102
## 2 0.05517984 0.2758992
## 3 0.87134707 0.8713471
## 4 0.64104814 0.8013102
## 5 0.49729155 0.8013102

testMetadata(cp_lowinter, 2, processedLowinter)

##             Term  Estimate                                           CI
## 1  plaquepercent  2.156444    -0.00382845328747255 – 0.0081413416846705
## 2    bomppercent -4.091747 -0.00783089859963512 – -0.000352594798941325
## 3 Area_delta_R30  7.257480   -0.000282823087328943 – 0.0147977821470906
## 4         gender 74.027343        -0.032144561763505 – 0.18019924770268
## 5            age  5.422361     -0.0040343211065686 – 0.0148790435409806
##      P_value  P_adjust
## 1 0.46935734 0.4693573
## 2 0.03287750 0.1468343
## 3 0.05873372 0.1468343
## 4 0.16576694 0.2762782
## 5 0.25228070 0.3153509

testFeatures(cp_lowinter, processedLowinter, 2, "bomppercent") # no flip

## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`

## [1] "Positive loadings:"
##      bomppercent
## [1,]   0.2341633
## [1] "Negative loadings:"
##      bomppercent
## [1,]  -0.2341633

a = processedLowinter$mode1 %>%
  mutate(Component_1 = cp_lowinter$Fac[[1]][,2]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=bomppercent,y=Component_1)) +
  geom_point() +
  xlab("BOMP%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedLowinter$mode2, cp_lowinter, 2, flip=FALSE) + scale_fill_manual(values=phylum_colors[-c(3)]) + theme(legend.position="none")

c = processedLowinter$mode3 %>%
  mutate(Component_1 = -1*cp_lowinter$Fac[[3]][,2], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

Upper jaw lingual microbiome

CP was applied to capture the dominant source of variation in the upper jaw lingual microbiome data block. The model selection procedure revealed that CORCONDIA scores were too low at three components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_upling = parafac4microbiome::assessModelQuality(processedUpling$data, numRepetitions=10, numCores=10)

# assessment_upling$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# stability_upling = parafac4microbiome::assessModelStability(processedUpling, maxNumComponents=3, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_upling$modelPlots[[2]]
# stability_upling$modelPlots[[3]]
#

# cp_upling = parafac4microbiome::parafac(processedUpling$data, nfac=2, nstart=100)
cp_upling = readRDS("./TIFN2/cp_upling.RDS")

cp_upling$varExp

## [1] 19.19163

Therefore, a two-component CP model was selected, explaining 19.2% of the variation. However, MLR analysis revealed that the first CP component captured uninterpretable variation, while the second component captured variation exclusively but weakly associated with RF%.

testMetadata(cp_upling, 1, processedUpling)

##             Term   Estimate                                          CI
## 1  plaquepercent  3.5957413  -0.00193167844376722 – 0.00912316112826477
## 2    bomppercent  2.5813448 -0.000871991211114323 – 0.00603468087307641
## 3 Area_delta_R30  7.9435166   0.000979584806484587 – 0.0149074483917542
## 4         gender 78.4426156     -0.0196136482063289 – 0.176498879470703
## 5            age -0.4196712  -0.00915349714410007 – 0.00831415476076494
##      P_value  P_adjust
## 1 0.19519152 0.2439894
## 2 0.13812418 0.2302070
## 3 0.02655809 0.1327905
## 4 0.11334141 0.2302070
## 5 0.92284676 0.9228468

testMetadata(cp_upling, 2, processedUpling)

##             Term    Estimate                                          CI
## 1  plaquepercent  -2.5436852   -0.0086495593078846 – 0.00356218888854485
## 2    bomppercent  -0.1702389   -0.0039849726780106 – 0.00364449492465157
## 3 Area_delta_R30  -8.1288343 -0.0158215539877252 – -0.000436114704840301
## 4         gender -85.1621897     -0.193480215958621 – 0.0231558365641216
## 5            age  -0.6466179   -0.0102944542293067 – 0.00900121851702191
##      P_value  P_adjust
## 1 0.40344530 0.6724088
## 2 0.92832935 0.9283294
## 3 0.03895533 0.1947767
## 4 0.11945334 0.2986333
## 5 0.89255185 0.9283294

testFeatures(cp_upling, processedUpling, 2, "Area_delta_R30") # flip

## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`

## [1] "Positive loadings:"
##      Area_delta_R30
## [1,]     -0.3280193
## [1] "Negative loadings:"
##      Area_delta_R30
## [1,]      0.3280193

a = processedUpling$mode1 %>%
  mutate(Component_1 = cp_upling$Fac[[1]][,2]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=Area_delta_R30,y=Component_1)) +
  geom_point() +
  xlab("RF%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedUpling$mode2, cp_upling, 2, flip=TRUE) + scale_fill_manual(values=phylum_colors[-c(3,7)]) + theme(legend.position="none")

c = processedUpling$mode3 %>%
  mutate(Component_1 = cp_upling$Fac[[3]][,2], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

Upper jaw interproximal microbiome

CP was applied to capture the dominant source of variation in the upper jaw interproximal microbiome data block. The model selection procedure revealed that CORCONDIA scores were inconclusive and loadings were unstable at three components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_upinter = parafac4microbiome::assessModelQuality(processedUpinter$data, numRepetitions=10, numCores=10)

# assessment_upinter$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# stability_upinter = parafac4microbiome::assessModelStability(processedUpinter, maxNumComponents=3, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_upinter$modelPlots[[2]]
# stability_upinter$modelPlots[[3]]
#

# cp_upinter = parafac4microbiome::parafac(processedUpinter$data, nfac=2, nstart=100)
cp_upinter = readRDS("./TIFN2/cp_upinter.RDS")

cp_upinter$varExp

## [1] 16.10052

Therefore, a two-component CP model was selected, explaining 16.1% of the variation. However, MLR analysis revealed that the first CP component captured variation associated with a mixture of BOMP% and subject gender, while the second component was not interpretable.

testMetadata(cp_upinter, 1, processedUpinter)

##             Term    Estimate                                         CI
## 1  plaquepercent   2.1326839 -0.00328755891087678 – 0.00755292666044199
## 2    bomppercent   3.7419573 0.000355581649636956 – 0.00712833294888875
## 3 Area_delta_R30   6.3702807  -0.000458619993549555 – 0.013199181483497
## 4         gender 114.7064044     0.0185514573648732 – 0.210861351396827
## 5            age  -0.6618349 -0.00922631147351948 – 0.00790264161581984
##      P_value   P_adjust
## 1 0.42980563 0.53725704
## 2 0.03130954 0.07827386
## 3 0.06654502 0.11090837
## 4 0.02076356 0.07827386
## 5 0.87624079 0.87624079

testMetadata(cp_upinter, 2, processedUpinter)

##             Term   Estimate                                         CI
## 1  plaquepercent -2.4316053 -0.00867251409160288 – 0.00380930341158905
## 2    bomppercent  2.7501722 -0.00114892641938689 – 0.00664927091740296
## 3 Area_delta_R30 -0.9668053  -0.0088296535710741 – 0.00689604288502859
## 4         gender 30.5288503    -0.0801846866985154 – 0.141242387353944
## 5            age -7.7000402  -0.0175612435880203 – 0.00216116322441363
##     P_value  P_adjust
## 1 0.4342843 0.7238072
## 2 0.1610410 0.4026024
## 3 0.8043410 0.8043410
## 4 0.5791848 0.7239810
## 5 0.1219174 0.4026024

testFeatures(cp_upinter, processedUpinter, 1, "bomppercent") # flip

## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`

## [1] "Positive loadings:"
##      bomppercent
## [1,]  -0.4565796
## [1] "Negative loadings:"
##      bomppercent
## [1,]   0.4565796

testFeatures(cp_upinter, processedUpinter, 1, "gender") # flip

## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`

## [1] "Positive loadings:"
##          gender
## [1,] -0.2166038
## [1] "Negative loadings:"
##         gender
## [1,] 0.2166038

a = processedUpinter$mode1 %>%
  mutate(Component_1 = cp_upinter$Fac[[1]][,1]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=bomppercent,y=Component_1)) +
  geom_point() +
  xlab("BOMP%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedUpinter$mode2, cp_upinter, 1, flip=TRUE) + scale_fill_manual(values=phylum_colors[-c(3,7)]) + theme(legend.position="none")

c = processedUpinter$mode3 %>%
  mutate(Component_1 = -1*cp_upinter$Fac[[3]][,1], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

Salivary microbiome

CP was applied to capture the dominant source of variation in the salivary microbiome data block. The model selection procedure revealed that CORCONDIA scores were too low at three components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_saliva = parafac4microbiome::assessModelQuality(processedSaliva$data, numRepetitions=10, numCores=10)

# assessment_saliva$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# stability_saliva = parafac4microbiome::assessModelStability(processedSaliva, maxNumComponents=3, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_saliva$modelPlots[[2]]
# stability_saliva$modelPlots[[3]]
#

# cp_saliva = parafac4microbiome::parafac(processedSaliva$data, nfac=2, nstart=100)
cp_saliva = readRDS("./TIFN2/cp_saliva.RDS")

cp_saliva$varExp

## [1] 17.33499

Therefore, a two-component CP model was selected, explaining 17.3% of the variation. However, MLR analysis revealed that the first CP component captured uninterpretable variation, while the second component captured variation exclusively associated with subject age.

testMetadata(cp_saliva, 1, processedSaliva)

##             Term   Estimate                                         CI
## 1  plaquepercent   2.410738 -0.00355831149506644 – 0.00837978682976563
## 2    bomppercent  -2.504120 -0.00623337039896853 – 0.00122513045951894
## 3 Area_delta_R30  -5.519053  -0.0130393880948642 – 0.00200128290288186
## 4         gender -86.946657    -0.192837413136006 – 0.0189440987425446
## 5            age  -7.035609  -0.0164672494454233 – 0.00239603161672677
##     P_value  P_adjust
## 1 0.4178187 0.4178187
## 2 0.1815319 0.2269149
## 3 0.1452174 0.2269149
## 4 0.1044564 0.2269149
## 5 0.1389100 0.2269149

testMetadata(cp_saliva, 2, processedSaliva)

##             Term   Estimate                                         CI
## 1  plaquepercent   4.868327 -0.000666119508513777 – 0.0104027729980185
## 2    bomppercent   1.049513 -0.00240821269203614 – 0.00450723916772543
## 3 Area_delta_R30   5.576203  -0.00139658153889448 – 0.0125489871748319
## 4         gender  59.330741    -0.0388501724466451 – 0.157511653873589
## 5            age -11.908698 -0.0206536259300458 – -0.00316376911062791
##      P_value  P_adjust
## 1 0.08280442 0.1890952
## 2 0.54175458 0.5417546
## 3 0.11345712 0.1890952
## 4 0.22808875 0.2851109
## 5 0.00903208 0.0451604

testFeatures(cp_saliva, processedSaliva, 2, "age") # no flip

## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`

## [1] "Positive loadings:"
##            age
## [1,] 0.3592963
## [1] "Negative loadings:"
##             age
## [1,] -0.3592963

a = processedSaliva$mode1 %>%
  mutate(Component_1 = cp_saliva$Fac[[1]][,2]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=age,y=Component_1)) +
  geom_point() +
  xlab("Age") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedSaliva$mode2, cp_saliva, 2, flip=FALSE) + scale_fill_manual(values=phylum_colors[-c(7)]) + theme(legend.position="none")

c = processedSaliva$mode3 %>%
  mutate(Component_1 = -1*cp_saliva$Fac[[3]][,2], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

Salivary metabolomics

CP was applied to capture the dominant source of variation in the salivary metabolomics data block. The model selection procedure revealed that CORCONDIA scores were too low at two components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_metab = parafac4microbiome::assessModelQuality(processedMetabolomics$data, numRepetitions=10, numCores=10)

# assessment_metab$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# stability_metab = parafac4microbiome::assessModelStability(processedMetabolomics, maxNumComponents=2, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_metab$modelPlots[[1]]
# stability_metab$modelPlots[[2]]

# cp_metabolomics = parafac4microbiome::parafac(processedMetabolomics$data, nfac=1, nstart=100)
cp_metabolomics = readRDS("./TIFN2/cp_metabolomics.RDS")

cp_metabolomics$varExp

## [1] 15.44305

Therefore, a one-component CP model was selected, explaining 15.4% of the variation. However, MLR analysis revealed that the CP component captured uninterpretable variation.

testMetadata(cp_metabolomics, 1, processedMetabolomics)

##             Term   Estimate                                         CI
## 1  plaquepercent   4.340779  -0.00177252492396623 – 0.0104540835526603
## 2    bomppercent   1.782998 -0.00195667074036764 – 0.00552266679650121
## 3 Area_delta_R30   3.973073  -0.00357131688284603 – 0.0115174629081315
## 4         gender -35.447349    -0.141829531868009 – 0.0709348329387957
## 5            age  -7.356789  -0.0168151275419762 – 0.00210155014536747
##     P_value  P_adjust
## 1 0.1581698 0.3954244
## 2 0.3394212 0.4242765
## 3 0.2920535 0.4242765
## 4 0.5028867 0.5028867
## 5 0.1232062 0.3954244

2025-08-31