Skip to contents

Introduction

This article presents the CP results of the TIFN2 dataset discussed in Chapter 4 of my dissertation.

Preamble

## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'CMTFtoolbox'
## The following object is masked from 'package:NPLStoolbox':
## 
##     npred
## The following objects are masked from 'package:parafac4microbiome':
## 
##     fac_to_vect, reinflateFac, reinflateTensor, vect_to_fac
rf_data = read.csv("../../data-raw/vanderPloeg2024/RFdata.csv")
colnames(rf_data) = c("subject", "id", "fotonr", "day", "group", "RFgroup", "MQH", "SPS(tm)", "Area_delta_R30", "Area_delta_Rmax", "Area_delta_R30_x_Rmax", "gingiva_mean_R_over_G", "gingiva_mean_R_over_G_upper_jaw", "gingiva_mean_R_over_G_lower_jaw")
rf_data = rf_data %>% as_tibble()

rf_data[rf_data$subject == "VSTPHZ", 1] = "VSTPH2"
rf_data[rf_data$subject == "D2VZH0", 1] = "DZVZH0"
rf_data[rf_data$subject == "DLODNN", 1] = "DLODDN"
rf_data[rf_data$subject == "O3VQFX", 1] = "O3VQFQ"
rf_data[rf_data$subject == "F80LGT", 1] = "F80LGF"
rf_data[rf_data$subject == "26QQR0", 1] = "26QQrO"

rf_data2 = read.csv("../../data-raw/vanderPloeg2024/red_fluorescence_data.csv") %>% as_tibble()
rf_data2 = rf_data2[,c(2,4,181:192)]
rf_data = rf_data %>% left_join(rf_data2)
## Joining with `by = join_by(id, day)`
rf = rf_data %>% select(subject, RFgroup) %>% unique()
age_gender = read.csv("../../data-raw/vanderPloeg2024/Ploeg_subjectMetadata.csv", sep=";")
age_gender = age_gender[2:nrow(age_gender),2:ncol(age_gender)]
age_gender = age_gender %>% as_tibble() %>% filter(onderzoeksgroep == 0) %>% select(naam, leeftijd, geslacht)
colnames(age_gender) = c("subject", "age", "gender")

# Correction for incorrect subject ids
age_gender[age_gender$subject == "VSTPHZ", 1] = "VSTPH2"
age_gender[age_gender$subject == "D2VZH0", 1] = "DZVZH0"
age_gender[age_gender$subject == "DLODNN", 1] = "DLODDN"
age_gender[age_gender$subject == "O3VQFX", 1] = "O3VQFQ"
age_gender[age_gender$subject == "F80LGT", 1] = "F80LGF"
age_gender[age_gender$subject == "26QQR0", 1] = "26QQrO"

age_gender = age_gender %>% arrange(subject)
mapping = c(-14,0,2,5,9,14,21)

testMetadata = function(model, comp, metadata){
  transformedSubjectLoadings = model$Fac[[1]][,comp]
  transformedSubjectLoadings = transformedSubjectLoadings / norm(transformedSubjectLoadings, "2")
  
  metadata = metadata$mode1 %>% left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject")
  
  result = lm(transformedSubjectLoadings ~ plaquepercent + bomppercent + Area_delta_R30 + gender + age, data=metadata)
  
  # Extract coefficients and confidence intervals
  coef_estimates <- summary(result)$coefficients
  conf_intervals <- confint(result)
  
  # Remove intercept
  coef_estimates <- coef_estimates[rownames(coef_estimates) != "(Intercept)", ]
  conf_intervals <- conf_intervals[rownames(conf_intervals) != "(Intercept)", ]
  
  # Combine into a clean data frame
  summary_table <- data.frame(
    Term     = rownames(coef_estimates),
    Estimate = coef_estimates[, "Estimate"] * 1e3,
    CI       = paste0(
                 conf_intervals[, 1], " – ",
                 conf_intervals[, 2]
               ),
    P_value  = coef_estimates[, "Pr(>|t|)"],
    P_adjust = p.adjust(coef_estimates[, "Pr(>|t|)"], "BH"),
    row.names = NULL
  )
  
  return(summary_table)
}

testFeatures = function(model, metadata, componentNum, metadataVar){
  df = metadata$mode1 %>% left_join(rf_data %>% filter(day==14)) %>% left_join(age_gender)
  topIndices = metadata$mode2 %>% mutate(index=1:nrow(.), Comp = model$Fac[[2]][,componentNum]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
  bottomIndices = metadata$mode2 %>% mutate(index=1:nrow(.), Comp = model$Fac[[2]][,componentNum]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()
  
  timepoint = which(abs(model$Fac[[3]][,componentNum]) == max(abs(model$Fac[[3]][,componentNum])))
  
  Xhat = parafac4microbiome::reinflateTensor(model$Fac[[1]][,componentNum], model$Fac[[2]][,componentNum], model$Fac[[3]][,componentNum])
  y = df[metadataVar]
  
  print("Positive loadings:")
  print(cor(Xhat[,topIndices[1],timepoint], y))
  print("Negative loadings:")
  print(cor(Xhat[,bottomIndices[1],timepoint], y))
}

plotFeatures = function(mode2, model, componentNum, flip=FALSE){
  
  if(flip==TRUE){
    df = mode2 %>% mutate(Comp = -1*model$Fac[[2]][,componentNum]) %>% arrange(Comp) %>% filter(Species != "") %>% mutate(index=1:nrow(.), name = paste(Genus, Species))
  } else{
    df = mode2 %>% mutate(Comp = model$Fac[[2]][,componentNum]) %>% arrange(Comp) %>% filter(Species != "") %>% mutate(index=1:nrow(.), name = paste(Genus, Species))
  }
  df = rbind(df %>% head(n=10), df %>% tail(n=10))
  
  plot=df %>%
    ggplot(aes(x=Comp,y=as.factor(index),fill=as.factor(Phylum))) +
    geom_bar(stat="identity", col="black") +
    scale_y_discrete(label=df$name) +
    xlab("Loading") +
    ylab("") +
    guides(fill=guide_legend(title="Phylum")) +
    theme(text=element_text(size=16))
  
  return(plot)
}
phylum_colors = hue_pal()(7)
processedTongue = processDataCube(parafac4microbiome::vanderPloeg2024$tongue, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedLowling = processDataCube(parafac4microbiome::vanderPloeg2024$lower_jaw_lingual, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedLowinter = processDataCube(parafac4microbiome::vanderPloeg2024$lower_jaw_interproximal, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedUpling = processDataCube(parafac4microbiome::vanderPloeg2024$upper_jaw_lingual, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedUpinter = processDataCube(parafac4microbiome::vanderPloeg2024$upper_jaw_interproximal, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedSaliva = processDataCube(parafac4microbiome::vanderPloeg2024$saliva, sparsityThreshold=0.50, considerGroups=TRUE, groupVariable="RFgroup", CLR=TRUE, centerMode=1, scaleMode=2)
processedMetabolomics = parafac4microbiome::vanderPloeg2024$metabolomics
colourCols = c("", "", "")
legendTitles = c("RF group", "Phylum", "")
xLabels = c("Subject index", "Feature index", "Time index")
legendColNums = c(3,5,0)
arrangeModes = c(TRUE, TRUE, FALSE)
continuousModes = c(FALSE,FALSE,TRUE)

Tongue microbiome

CP was applied to capture the dominant source of variation in the tongue microbiome data block. The model selection procedure revealed that CORCONDIA scores were inconclusive and loadings were unstable at three components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_tongue = parafac4microbiome::assessModelQuality(processedTongue$data, numRepetitions=10, numCores=10)

# assessment_tongue$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# stability_tongue = parafac4microbiome::assessModelStability(processedTongue, maxNumComponents=3, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_tongue$modelPlots[[2]]
# stability_tongue$modelPlots[[3]]
# cp_tongue = parafac4microbiome::parafac(processedTongue$data, nfac=2, nstart=100)

cp_tongue = readRDS("./TIFN2/cp_tongue.RDS")
cp_tongue$varExp
## [1] 31.37847

Therefore, a two-component CP model was selected, explaining 31.4% of the variation. However, MLR analysis revealed that the first component captured variation associated with a mixture of plaque% and subject age, while the second component was not interpretable.

testMetadata(cp_tongue, 1, processedTongue)
##             Term  Estimate                                         CI
## 1  plaquepercent -7.085726 -0.0125436561188763 – -0.00162779504204587
## 2    bomppercent  1.189991 -0.00221993077742614 – 0.00459991246706438
## 3 Area_delta_R30  2.527468 -0.00434891510321956 – 0.00940385109507837
## 4         gender 11.109220    -0.0857143066863078 – 0.107932746588392
## 5            age 13.793423   0.00516939627811755 – 0.0224174495211815
##       P_value   P_adjust
## 1 0.012433782 0.03108445
## 2 0.483348754 0.60418594
## 3 0.460540316 0.60418594
## 4 0.817174114 0.81717411
## 5 0.002572981 0.01286491
testMetadata(cp_tongue, 2, processedTongue)
##             Term   Estimate                                         CI
## 1  plaquepercent  -0.877180 -0.00719453415388883 – 0.00544017410099945
## 2    bomppercent  -2.057600  -0.0060044593190253 – 0.00188925874240208
## 3 Area_delta_R30  -5.035993   -0.012995154144287 – 0.00292316758866404
## 4         gender -42.102114    -0.154171789877283 – 0.0699675615970461
## 5            age   2.246435  -0.00773555910142959 – 0.0122284290033156
##     P_value  P_adjust
## 1 0.7796915 0.7796915
## 2 0.2971467 0.7428669
## 3 0.2074012 0.7428669
## 4 0.4507719 0.7512864
## 5 0.6505852 0.7796915
testFeatures(cp_tongue, processedTongue, 1, "plaquepercent") # no flip
## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`
## [1] "Positive loadings:"
##      plaquepercent
## [1,]     0.3063565
## [1] "Negative loadings:"
##      plaquepercent
## [1,]    -0.3063565
testFeatures(cp_tongue, processedTongue, 1, "age") # flip
## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`
## [1] "Positive loadings:"
##             age
## [1,] -0.4538388
## [1] "Negative loadings:"
##            age
## [1,] 0.4538388
a = processedTongue$mode1 %>%
  mutate(Component_1 = cp_tongue$Fac[[1]][,1]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=plaquepercent,y=Component_1)) +
  geom_point() +
  xlab("Plaque%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedTongue$mode2, cp_tongue, 1, flip=FALSE) + scale_fill_manual(values=phylum_colors[-c(3,7)]) + theme(legend.position="none")

c = processedTongue$mode3 %>%
  mutate(Component_1 = -1*cp_tongue$Fac[[3]][,1], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

Lower jaw lingual microbiome

CP was applied to capture the dominant source of variation in the lower jaw lingual microbiome data block. The model selection procedure revealed CORCONDIA scores were too low at two components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_lowling = parafac4microbiome::assessModelQuality(processedLowling$data, numRepetitions=10, numCores=10)

# assessment_lowling$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# stability_lowling = parafac4microbiome::assessModelStability(processedLowling, maxNumComponents=2, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_lowling$modelPlots[[1]]
# stability_lowling$modelPlots[[2]]
# cp_lowling = parafac4microbiome::parafac(processedLowling$data, nfac=1, nstart=100)
cp_lowling = readRDS("./TIFN2/cp_lowling.RDS")

cp_lowling$varExp
## [1] 10.55114

Therefore, a one-component CP model was selected, explaining 10.6% of the variation. Surprisingly, MLR analysis revealed that the CP component captured variation exclusively associated with RF%.

testMetadata(cp_lowling, 1, processedLowling)
##             Term   Estimate                                          CI
## 1  plaquepercent  -4.047703 -0.00897953402999654 – 0.000884127338494313
## 2    bomppercent  -2.597078 -0.00567831104112229 – 0.000484155105742315
## 3 Area_delta_R30 -10.235675  -0.0164492308704168 – -0.00402211882627686
## 4         gender -83.222465    -0.170712996229106 – 0.00426806527981913
## 5            age  -2.544554    -0.0103372952848458 – 0.0052481867268659
##      P_value    P_adjust
## 1 0.10460587 0.130757338
## 2 0.09591003 0.130757338
## 3 0.00197737 0.009886849
## 4 0.06160528 0.130757338
## 5 0.51174413 0.511744134
testFeatures(cp_lowling, processedLowling, 1, "Area_delta_R30") # flip
## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`
## [1] "Positive loadings:"
##      Area_delta_R30
## [1,]     -0.5298343
## [1] "Negative loadings:"
##      Area_delta_R30
## [1,]      0.5298343
a = processedLowling$mode1 %>%
  mutate(Component_1 = cp_lowling$Fac[[1]][,1]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=Area_delta_R30,y=Component_1)) +
  geom_point() +
  xlab("RF%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedLowling$mode2, cp_lowling, 1, flip=TRUE) + scale_fill_manual(values=phylum_colors[-7]) + theme(legend.position="none")

c = processedLowling$mode3 %>%
  mutate(Component_1 = cp_lowling$Fac[[3]][,1], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

Positively loaded microbiota including Fusobacterium canifelinum / nucleatum, Aggregatibacter segnis, Capnocytophaga leadbetteri, Campylobacter concisus, Fusobacterium hwasookii / nucleatum / periodonticum, Cardiobacterium hominis, Porphyromonas pasteri, Corynebacterium matruchotii, Capnocytophaga granulosa, and Fusobacterium nucleatum were enriched in high responders and depleted in low responders. Conversely, negatively loaded microbiota including Campylobacter concisus, Rothia dentocariosa, Veillonella atypica / dispar, Atopobium parvulum, Prevotella melaninogenica, Veillonella dispar / parvula, Veillonella parvula / rogosae / tobetsuensis, Actinomyces naeslundii / oris / viscosus, Prevotella nanceiensis, and Kingella oralis were enriched in low responders and depleted in high responders. The time mode loadings of the component indicate that the inter-subject differences due to RF% were constant throughout the time series.

Lower jaw interproximal microbiome

CP was applied to capture the dominant source of variation in the lower jaw interproximal microbiome data block. The model selection procedure revealed that CORCONDIA scores were too low at three components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_lowinter = parafac4microbiome::assessModelQuality(processedLowinter$data, numRepetitions=10, numCores=10)

# assessment_lowinter$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# 
# stability_lowinter = parafac4microbiome::assessModelStability(processedLowinter, maxNumComponents=3, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_lowinter$modelPlots[[2]]
# stability_lowinter$modelPlots[[3]]
# 
# cp_lowinter = parafac4microbiome::parafac(processedLowinter$data, nfac=2, nstart=100)
cp_lowinter = readRDS("./TIFN2/cp_lowinter.RDS")

cp_lowinter$varExp
## [1] 10.32969

Therefore, a two-component CP model was selected, explaining 10.3% of the variation. However, MLR analysis revealed that the first CP component captured uninterpretable variation, while the second component captured variation exclusively associated with BOMP%.

testMetadata(cp_lowinter, 1, processedLowinter)
##             Term    Estimate                                          CI
## 1  plaquepercent  -2.8231325  -0.00885260016583814 – 0.00320633515325942
## 2    bomppercent  -3.6808351 -0.00744783286106304 – 8.61626701053834e-05
## 3 Area_delta_R30  -0.6104507  -0.00820690677640243 – 0.00698600534263869
## 4         gender -24.7799778     -0.131742556135758 – 0.0821826004540417
## 5            age  -3.2188894   -0.0127459966584142 – 0.00630821784418125
##      P_value  P_adjust
## 1 0.34835493 0.8013102
## 2 0.05517984 0.2758992
## 3 0.87134707 0.8713471
## 4 0.64104814 0.8013102
## 5 0.49729155 0.8013102
testMetadata(cp_lowinter, 2, processedLowinter)
##             Term  Estimate                                           CI
## 1  plaquepercent  2.156444    -0.00382845328747255 – 0.0081413416846705
## 2    bomppercent -4.091747 -0.00783089859963512 – -0.000352594798941325
## 3 Area_delta_R30  7.257480   -0.000282823087328943 – 0.0147977821470906
## 4         gender 74.027343        -0.032144561763505 – 0.18019924770268
## 5            age  5.422361     -0.0040343211065686 – 0.0148790435409806
##      P_value  P_adjust
## 1 0.46935734 0.4693573
## 2 0.03287750 0.1468343
## 3 0.05873372 0.1468343
## 4 0.16576694 0.2762782
## 5 0.25228070 0.3153509
testFeatures(cp_lowinter, processedLowinter, 2, "bomppercent") # no flip
## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`
## [1] "Positive loadings:"
##      bomppercent
## [1,]   0.2341633
## [1] "Negative loadings:"
##      bomppercent
## [1,]  -0.2341633
a = processedLowinter$mode1 %>%
  mutate(Component_1 = cp_lowinter$Fac[[1]][,2]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=bomppercent,y=Component_1)) +
  geom_point() +
  xlab("BOMP%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedLowinter$mode2, cp_lowinter, 2, flip=FALSE) + scale_fill_manual(values=phylum_colors[-c(3)]) + theme(legend.position="none")

c = processedLowinter$mode3 %>%
  mutate(Component_1 = -1*cp_lowinter$Fac[[3]][,2], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

Upper jaw lingual microbiome

CP was applied to capture the dominant source of variation in the upper jaw lingual microbiome data block. The model selection procedure revealed that CORCONDIA scores were too low at three components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_upling = parafac4microbiome::assessModelQuality(processedUpling$data, numRepetitions=10, numCores=10)

# assessment_upling$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# stability_upling = parafac4microbiome::assessModelStability(processedUpling, maxNumComponents=3, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_upling$modelPlots[[2]]
# stability_upling$modelPlots[[3]]
# 
# cp_upling = parafac4microbiome::parafac(processedUpling$data, nfac=2, nstart=100)
cp_upling = readRDS("./TIFN2/cp_upling.RDS")

cp_upling$varExp
## [1] 19.19163

Therefore, a two-component CP model was selected, explaining 19.2% of the variation. However, MLR analysis revealed that the first CP component captured uninterpretable variation, while the second component captured variation exclusively but weakly associated with RF%.

testMetadata(cp_upling, 1, processedUpling)
##             Term   Estimate                                          CI
## 1  plaquepercent  3.5957413  -0.00193167844376722 – 0.00912316112826477
## 2    bomppercent  2.5813448 -0.000871991211114323 – 0.00603468087307641
## 3 Area_delta_R30  7.9435166   0.000979584806484587 – 0.0149074483917542
## 4         gender 78.4426156     -0.0196136482063289 – 0.176498879470703
## 5            age -0.4196712  -0.00915349714410007 – 0.00831415476076494
##      P_value  P_adjust
## 1 0.19519152 0.2439894
## 2 0.13812418 0.2302070
## 3 0.02655809 0.1327905
## 4 0.11334141 0.2302070
## 5 0.92284676 0.9228468
testMetadata(cp_upling, 2, processedUpling)
##             Term    Estimate                                          CI
## 1  plaquepercent  -2.5436852   -0.0086495593078846 – 0.00356218888854485
## 2    bomppercent  -0.1702389   -0.0039849726780106 – 0.00364449492465157
## 3 Area_delta_R30  -8.1288343 -0.0158215539877252 – -0.000436114704840301
## 4         gender -85.1621897     -0.193480215958621 – 0.0231558365641216
## 5            age  -0.6466179   -0.0102944542293067 – 0.00900121851702191
##      P_value  P_adjust
## 1 0.40344530 0.6724088
## 2 0.92832935 0.9283294
## 3 0.03895533 0.1947767
## 4 0.11945334 0.2986333
## 5 0.89255185 0.9283294
testFeatures(cp_upling, processedUpling, 2, "Area_delta_R30") # flip
## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`
## [1] "Positive loadings:"
##      Area_delta_R30
## [1,]     -0.3280193
## [1] "Negative loadings:"
##      Area_delta_R30
## [1,]      0.3280193
a = processedUpling$mode1 %>%
  mutate(Component_1 = cp_upling$Fac[[1]][,2]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=Area_delta_R30,y=Component_1)) +
  geom_point() +
  xlab("RF%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedUpling$mode2, cp_upling, 2, flip=TRUE) + scale_fill_manual(values=phylum_colors[-c(3,7)]) + theme(legend.position="none")

c = processedUpling$mode3 %>%
  mutate(Component_1 = cp_upling$Fac[[3]][,2], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

Upper jaw interproximal microbiome

CP was applied to capture the dominant source of variation in the upper jaw interproximal microbiome data block. The model selection procedure revealed that CORCONDIA scores were inconclusive and loadings were unstable at three components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_upinter = parafac4microbiome::assessModelQuality(processedUpinter$data, numRepetitions=10, numCores=10)

# assessment_upinter$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# stability_upinter = parafac4microbiome::assessModelStability(processedUpinter, maxNumComponents=3, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_upinter$modelPlots[[2]]
# stability_upinter$modelPlots[[3]]
# 
# cp_upinter = parafac4microbiome::parafac(processedUpinter$data, nfac=2, nstart=100)
cp_upinter = readRDS("./TIFN2/cp_upinter.RDS")

cp_upinter$varExp
## [1] 16.10052

Therefore, a two-component CP model was selected, explaining 16.1% of the variation. However, MLR analysis revealed that the first CP component captured variation associated with a mixture of BOMP% and subject gender, while the second component was not interpretable.

testMetadata(cp_upinter, 1, processedUpinter)
##             Term    Estimate                                         CI
## 1  plaquepercent   2.1326839 -0.00328755891087678 – 0.00755292666044199
## 2    bomppercent   3.7419573 0.000355581649636956 – 0.00712833294888875
## 3 Area_delta_R30   6.3702807  -0.000458619993549555 – 0.013199181483497
## 4         gender 114.7064044     0.0185514573648732 – 0.210861351396827
## 5            age  -0.6618349 -0.00922631147351948 – 0.00790264161581984
##      P_value   P_adjust
## 1 0.42980563 0.53725704
## 2 0.03130954 0.07827386
## 3 0.06654502 0.11090837
## 4 0.02076356 0.07827386
## 5 0.87624079 0.87624079
testMetadata(cp_upinter, 2, processedUpinter)
##             Term   Estimate                                         CI
## 1  plaquepercent -2.4316053 -0.00867251409160288 – 0.00380930341158905
## 2    bomppercent  2.7501722 -0.00114892641938689 – 0.00664927091740296
## 3 Area_delta_R30 -0.9668053  -0.0088296535710741 – 0.00689604288502859
## 4         gender 30.5288503    -0.0801846866985154 – 0.141242387353944
## 5            age -7.7000402  -0.0175612435880203 – 0.00216116322441363
##     P_value  P_adjust
## 1 0.4342843 0.7238072
## 2 0.1610410 0.4026024
## 3 0.8043410 0.8043410
## 4 0.5791848 0.7239810
## 5 0.1219174 0.4026024
testFeatures(cp_upinter, processedUpinter, 1, "bomppercent") # flip
## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`
## [1] "Positive loadings:"
##      bomppercent
## [1,]  -0.4565796
## [1] "Negative loadings:"
##      bomppercent
## [1,]   0.4565796
testFeatures(cp_upinter, processedUpinter, 1, "gender") # flip
## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`
## [1] "Positive loadings:"
##          gender
## [1,] -0.2166038
## [1] "Negative loadings:"
##         gender
## [1,] 0.2166038
a = processedUpinter$mode1 %>%
  mutate(Component_1 = cp_upinter$Fac[[1]][,1]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=bomppercent,y=Component_1)) +
  geom_point() +
  xlab("BOMP%") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedUpinter$mode2, cp_upinter, 1, flip=TRUE) + scale_fill_manual(values=phylum_colors[-c(3,7)]) + theme(legend.position="none")

c = processedUpinter$mode3 %>%
  mutate(Component_1 = -1*cp_upinter$Fac[[3]][,1], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

Salivary microbiome

CP was applied to capture the dominant source of variation in the salivary microbiome data block. The model selection procedure revealed that CORCONDIA scores were too low at three components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_saliva = parafac4microbiome::assessModelQuality(processedSaliva$data, numRepetitions=10, numCores=10)

# assessment_saliva$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# stability_saliva = parafac4microbiome::assessModelStability(processedSaliva, maxNumComponents=3, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_saliva$modelPlots[[2]]
# stability_saliva$modelPlots[[3]]
# 
# cp_saliva = parafac4microbiome::parafac(processedSaliva$data, nfac=2, nstart=100)
cp_saliva = readRDS("./TIFN2/cp_saliva.RDS")

cp_saliva$varExp
## [1] 17.33499

Therefore, a two-component CP model was selected, explaining 17.3% of the variation. However, MLR analysis revealed that the first CP component captured uninterpretable variation, while the second component captured variation exclusively associated with subject age.

testMetadata(cp_saliva, 1, processedSaliva)
##             Term   Estimate                                         CI
## 1  plaquepercent   2.410738 -0.00355831149506644 – 0.00837978682976563
## 2    bomppercent  -2.504120 -0.00623337039896853 – 0.00122513045951894
## 3 Area_delta_R30  -5.519053  -0.0130393880948642 – 0.00200128290288186
## 4         gender -86.946657    -0.192837413136006 – 0.0189440987425446
## 5            age  -7.035609  -0.0164672494454233 – 0.00239603161672677
##     P_value  P_adjust
## 1 0.4178187 0.4178187
## 2 0.1815319 0.2269149
## 3 0.1452174 0.2269149
## 4 0.1044564 0.2269149
## 5 0.1389100 0.2269149
testMetadata(cp_saliva, 2, processedSaliva)
##             Term   Estimate                                         CI
## 1  plaquepercent   4.868327 -0.000666119508513777 – 0.0104027729980185
## 2    bomppercent   1.049513 -0.00240821269203614 – 0.00450723916772543
## 3 Area_delta_R30   5.576203  -0.00139658153889448 – 0.0125489871748319
## 4         gender  59.330741    -0.0388501724466451 – 0.157511653873589
## 5            age -11.908698 -0.0206536259300458 – -0.00316376911062791
##      P_value  P_adjust
## 1 0.08280442 0.1890952
## 2 0.54175458 0.5417546
## 3 0.11345712 0.1890952
## 4 0.22808875 0.2851109
## 5 0.00903208 0.0451604
testFeatures(cp_saliva, processedSaliva, 2, "age") # no flip
## Joining with `by = join_by(subject, RFgroup)`
## Joining with `by = join_by(subject, age, gender)`
## [1] "Positive loadings:"
##            age
## [1,] 0.3592963
## [1] "Negative loadings:"
##             age
## [1,] -0.3592963
a = processedSaliva$mode1 %>%
  mutate(Component_1 = cp_saliva$Fac[[1]][,2]) %>%
  left_join(rf_data %>% select(subject,day,Area_delta_R30,plaquepercent,bomppercent) %>% filter(day==14), by="subject") %>%
  ggplot(aes(x=age,y=Component_1)) +
  geom_point() +
  xlab("Age") +
  ylab("Loading") +
  stat_cor() +
  theme(text=element_text(size=16))

b = plotFeatures(processedSaliva$mode2, cp_saliva, 2, flip=FALSE) + scale_fill_manual(values=phylum_colors[-c(7)]) + theme(legend.position="none")

c = processedSaliva$mode3 %>%
  mutate(Component_1 = -1*cp_saliva$Fac[[3]][,2], day=c(-14,0,2,5,9,14,21)) %>%
  ggplot(aes(x=day,y=Component_1)) +
  annotate(geom = "rect", xmin = 0, xmax = 14, ymin = -Inf, ymax = Inf, fill = "red", colour = "black", alpha=0.25) +
  geom_line() +
  geom_point() +
  xlab("Time point [days]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(legend.position="none", text=element_text(size=16))

a

b

c

Salivary metabolomics

CP was applied to capture the dominant source of variation in the salivary metabolomics data block. The model selection procedure revealed that CORCONDIA scores were too low at two components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_metab = parafac4microbiome::assessModelQuality(processedMetabolomics$data, numRepetitions=10, numCores=10)

# assessment_metab$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# stability_metab = parafac4microbiome::assessModelStability(processedMetabolomics, maxNumComponents=2, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_metab$modelPlots[[1]]
# stability_metab$modelPlots[[2]]
# cp_metabolomics = parafac4microbiome::parafac(processedMetabolomics$data, nfac=1, nstart=100)
cp_metabolomics = readRDS("./TIFN2/cp_metabolomics.RDS")

cp_metabolomics$varExp
## [1] 15.44305

Therefore, a one-component CP model was selected, explaining 15.4% of the variation. However, MLR analysis revealed that the CP component captured uninterpretable variation.

testMetadata(cp_metabolomics, 1, processedMetabolomics)
##             Term   Estimate                                         CI
## 1  plaquepercent   4.340779  -0.00177252492396623 – 0.0104540835526603
## 2    bomppercent   1.782998 -0.00195667074036764 – 0.00552266679650121
## 3 Area_delta_R30   3.973073  -0.00357131688284603 – 0.0115174629081315
## 4         gender -35.447349    -0.141829531868009 – 0.0709348329387957
## 5            age  -7.356789  -0.0168151275419762 – 0.00210155014536747
##     P_value  P_adjust
## 1 0.1581698 0.3954244
## 2 0.3394212 0.4242765
## 3 0.2920535 0.4242765
## 4 0.5028867 0.5028867
## 5 0.1232062 0.3954244