Skip to contents
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(ggpubr)
library(parafac4microbiome)
library(NPLStoolbox)
processedTongue = processDataCube(NPLStoolbox::Cornejo2025$Tongue_microbiome, sparsityThreshold = 0.5, considerGroups=TRUE, groupVariable="GenderID", CLR=TRUE, centerMode=1, scaleMode=2)
processedSaliva = processDataCube(NPLStoolbox::Cornejo2025$Salivary_microbiome, sparsityThreshold = 0.5, considerGroups=TRUE, groupVariable="GenderID", CLR=TRUE, centerMode=1, scaleMode=2)

processedCytokines = NPLStoolbox::Cornejo2025$Salivary_cytokines
processedCytokines$data = log(processedCytokines$data + 0.1300)
# Remove subject 1, 18, 19, and 27 due to being an outlier
processedCytokines$data = processedCytokines$data[-c(1,9,10,19),,]
processedCytokines$mode1 = processedCytokines$mode1[-c(1,9,10,19),]
processedCytokines$data = multiwayCenter(processedCytokines$data, 1)
processedCytokines$data = multiwayScale(processedCytokines$data, 2)

processedBiochemistry = NPLStoolbox::Cornejo2025$Salivary_biochemistry
processedBiochemistry$data = log(processedBiochemistry$data)
processedBiochemistry$data = multiwayCenter(processedBiochemistry$data, 1)
processedBiochemistry$data = multiwayScale(processedBiochemistry$data, 2)

processedHormones = NPLStoolbox::Cornejo2025$Circulatory_hormones
processedHormones$data = log(processedHormones$data)
processedHormones$data = multiwayCenter(processedHormones$data, 1)
processedHormones$data = multiwayScale(processedHormones$data, 2)
Y = as.numeric(as.factor(processedTongue$mode1$GenderID))
Ycnt = Y - mean(Y)
Ynorm_tongue = Ycnt / norm(Ycnt, "2")

Y = as.numeric(as.factor(processedSaliva$mode1$GenderID))
Ycnt = Y - mean(Y)
Ynorm_saliva = Ycnt / norm(Ycnt, "2")

Y = as.numeric(as.factor(processedCytokines$mode1$GenderID))
Ycnt = Y - mean(Y)
Ynorm_cytokine = Ycnt / norm(Ycnt, "2")

Y = as.numeric(as.factor(processedBiochemistry$mode1$GenderID))
Ycnt = Y - mean(Y)
Ynorm_biochem = Ycnt / norm(Ycnt, "2")

Y = as.numeric(as.factor(processedHormones$mode1$GenderID))
Ycnt = Y - mean(Y)
Ynorm_hormones = Ycnt / norm(Ycnt, "2")
CV_tongue = NPLStoolbox::ncrossreg(processedTongue$data, Ynorm_tongue, maxNumComponents = 5, cvFolds = 10)
CV_saliva = NPLStoolbox::ncrossreg(processedSaliva$data, Ynorm_saliva, maxNumComponents = 5, cvFolds = 10)
CV_cyto = NPLStoolbox::ncrossreg(processedCytokines$data, Ynorm_cytokine, maxNumComponents = 5, cvFolds = 10)
CV_bio = NPLStoolbox::ncrossreg(processedBiochemistry$data, Ynorm_biochem, maxNumComponents=5, cvFolds = 10)
CV_hormones = NPLStoolbox::ncrossreg(processedHormones$data, Ynorm_hormones, maxNumComponents=5, cvFolds = 10)

a=CV_tongue$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
b=CV_tongue$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
ggarrange(a,b) # 1


a=CV_saliva$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
b=CV_saliva$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
ggarrange(a,b) # 1


a=CV_cyto$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
b=CV_cyto$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
ggarrange(a,b) # 1


a=CV_bio$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
b=CV_bio$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
ggarrange(a,b) # 1


a=CV_hormones$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
b=CV_bio$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
ggarrange(a,b) # 1 or 2

npls_tongue = NPLStoolbox::triPLS1(processedTongue$data, Ynorm_tongue, 1)
npls_saliva = NPLStoolbox::triPLS1(processedSaliva$data, Ynorm_saliva, 1)
npls_cyto = NPLStoolbox::triPLS1(processedCytokines$data, Ynorm_cytokine, 1)
npls_bio = NPLStoolbox::triPLS1(processedBiochemistry$data, Ynorm_biochem, 1)
npls_hormones = NPLStoolbox::triPLS1(processedHormones$data, Ynorm_hormones, 2)
testMetadata = function(model, comp, metadata){
  staticMeta = Cornejo2025$Subject_metadata
  
  dynamicMeta = rTensor::k_unfold(rTensor::as.tensor(Cornejo2025$Clinical_measurements$data), 2)@data %>% t() %>% as_tibble()
  colnames(dynamicMeta) = Cornejo2025$Clinical_measurements$mode2$V1
  dynamicMeta = dynamicMeta %>%
    mutate(subject = rep(as.numeric(Cornejo2025$Clinical_measurements$mode1$subject), each=nrow(Cornejo2025$Clinical_measurements$mode3))) %>%
    mutate(newTimepoint = rep(Cornejo2025$Clinical_measurements$mode3$newTimepoint, nrow(Cornejo2025$Clinical_measurements$mode1)))
  
  transformedSubjectLoadings_static = transformPARAFACloadings(model$Fac, 1)[,comp]
  transformedSubjectLoadings_dynamic = transformPARAFACloadings(model$Fac, 2, moreOutput=TRUE)$F[,comp]
  
  metadata_static = metadata$mode1 %>% mutate(subject = as.integer(subject)) %>% left_join(staticMeta) %>% mutate(GenderID = as.numeric(as.factor(GenderID)) - 1)
  metadata_dynamic = transformedSubjectLoadings_dynamic %>% as_tibble() %>% mutate(subject = rep(as.numeric(metadata$mode1$subject), each=nrow(metadata$mode3))) %>% mutate(newTimepoint = rep(metadata$mode3$newTimepoint, nrow(metadata$mode1))) %>% left_join(dynamicMeta)
  
  staticResult = lm(transformedSubjectLoadings_static ~ GenderID + Age, data = metadata_static)
  dynamicResult = lm(transformedSubjectLoadings_dynamic ~ BOP + CODS + XI, data = metadata_dynamic)

  # Extract coefficients and confidence intervals
  coef_estimates <- summary(staticResult)$coefficients
  conf_intervals <- confint(staticResult)
  
  # Remove intercept
  coef_estimates <- coef_estimates[rownames(coef_estimates) != "(Intercept)", ]
  conf_intervals <- conf_intervals[rownames(conf_intervals) != "(Intercept)", ]
  
  # Combine into a clean data frame
  summary_table <- data.frame(
    Term     = rownames(coef_estimates),
    Estimate = round(coef_estimates[, "Estimate"], 2),
    CI       = paste0(
                 round(conf_intervals[, 1], 2), " – ",
                 round(conf_intervals[, 2], 2)
               ),
    P_value  = signif(coef_estimates[, "Pr(>|t|)"], 3),
    row.names = NULL
  )
  
  # Extract coefficients and confidence intervals
  coef_estimates <- summary(dynamicResult)$coefficients
  conf_intervals <- confint(dynamicResult)
  
  # Remove intercept
  coef_estimates <- coef_estimates[rownames(coef_estimates) != "(Intercept)", ]
  conf_intervals <- conf_intervals[rownames(conf_intervals) != "(Intercept)", ]
  
  # Combine into a clean data frame
  summary_table2 <- data.frame(
    Term     = rownames(coef_estimates),
    Estimate = round(coef_estimates[, "Estimate"], 2),
    CI       = paste0(
                 round(conf_intervals[, 1], 2), " – ",
                 round(conf_intervals[, 2], 2)
               ),
    P_value  = signif(coef_estimates[, "Pr(>|t|)"], 3),
    row.names = NULL
  )
  
  result = rbind(summary_table, summary_table2)
  return(result)
}

testMetadata(npls_tongue, 1, processedTongue)
#> Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
#> `.name_repair` is omitted as of tibble 2.0.0.
#>  Using compatibility `.name_repair`.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> Joining with `by = join_by(subject, GenderID)`
#> Joining with `by = join_by(subject, newTimepoint)`
#>       Term Estimate            CI P_value
#> 1 GenderID     8.33  3.39 – 13.27 0.00156
#> 2      Age    -0.06  -0.58 – 0.46 0.82100
#> 3      BOP    -0.09 -0.17 – -0.01 0.03080
#> 4     CODS    -0.04  -0.11 – 0.04 0.34300
#> 5       XI     0.00  -0.09 – 0.09 0.93900
testMetadata(npls_saliva, 1, processedSaliva)
#> Joining with `by = join_by(subject, GenderID)`
#> Joining with `by = join_by(subject, newTimepoint)`
#>       Term Estimate           CI  P_value
#> 1 GenderID     8.58 4.54 – 12.62 0.000122
#> 2      Age    -0.02  -0.45 – 0.4 0.907000
#> 3      BOP     0.07     0 – 0.14 0.065800
#> 4     CODS     0.06 -0.01 – 0.13 0.099100
#> 5       XI    -0.02 -0.11 – 0.06 0.550000
testMetadata(npls_cyto, 1, processedCytokines)
#> Joining with `by = join_by(subject, GenderID)`
#> Joining with `by = join_by(subject, newTimepoint)`
#>       Term Estimate            CI  P_value
#> 1 GenderID     3.52  -0.46 – 7.51 8.01e-02
#> 2      Age    -0.19  -0.61 – 0.24 3.68e-01
#> 3      BOP     0.01  -0.05 – 0.08 6.75e-01
#> 4     CODS     0.00  -0.06 – 0.06 9.88e-01
#> 5       XI    -0.14 -0.21 – -0.08 4.71e-05
testMetadata(npls_bio, 1, processedBiochemistry)
#> Joining with `by = join_by(subject, GenderID)`
#> Joining with `by = join_by(subject, newTimepoint)`
#>       Term Estimate            CI P_value
#> 1 GenderID     1.51   0.17 – 2.85 0.02790
#> 2      Age     0.05   -0.09 – 0.2 0.44800
#> 3      BOP     0.00  -0.02 – 0.02 0.83100
#> 4     CODS    -0.01  -0.03 – 0.01 0.23200
#> 5       XI    -0.04 -0.06 – -0.01 0.00409
testMetadata(npls_hormones, 1, processedHormones)
#> Joining with `by = join_by(subject, GenderID)`
#> Joining with `by = join_by(subject, newTimepoint)`
#>       Term Estimate           CI  P_value
#> 1 GenderID     5.12   4.3 – 5.94 7.84e-15
#> 2      Age    -0.02 -0.11 – 0.07 6.53e-01
#> 3      BOP    -0.04    -0.07 – 0 3.05e-02
#> 4     CODS     0.00 -0.03 – 0.03 9.22e-01
#> 5       XI     0.02 -0.02 – 0.05 2.84e-01
colourCols = c("GenderID", "", "")
legendTitles = c("Gender identity", "", "")
xLabels = c("Subject index", "Feature index", "Time index")
legendColNums = c(2,5,0)
arrangeModes = c(FALSE, FALSE, FALSE)
continuousModes = c(FALSE,FALSE,TRUE)

parafac4microbiome::plotPARAFACmodel(npls_tongue$Fac, processedTongue, 1, colourCols, legendTitles, xLabels, legendColNums, arrangeModes, continuousModes,
  overallTitle = "Tongue microbiome")

parafac4microbiome::plotPARAFACmodel(npls_saliva$Fac, processedSaliva, 1, colourCols, legendTitles, xLabels, legendColNums, arrangeModes, continuousModes,
  overallTitle = "Salivary microbiome")

parafac4microbiome::plotPARAFACmodel(npls_cyto$Fac, processedCytokines, 1, colourCols, legendTitles, xLabels, legendColNums, arrangeModes, continuousModes, overallTitle = "Salivary cytokines")

parafac4microbiome::plotPARAFACmodel(npls_bio$Fac, processedBiochemistry, 1, colourCols, legendTitles, xLabels, legendColNums, arrangeModes, continuousModes, overallTitle = "Salivary biochemistry")

parafac4microbiome::plotPARAFACmodel(npls_hormones$Fac, processedHormones, 2, colourCols, legendTitles, xLabels, legendColNums, arrangeModes, continuousModes, overallTitle = "Circulatory hormones")