GOHTRANS • NPLStoolbox

Preamble

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(parafac4microbiome)
library(NPLStoolbox)
library(CMTFtoolbox)
#> 
#> Attaching package: 'CMTFtoolbox'
#> The following object is masked from 'package:NPLStoolbox':
#> 
#>     npred
#> The following objects are masked from 'package:parafac4microbiome':
#> 
#>     fac_to_vect, reinflateFac, reinflateTensor, vect_to_fac
library(ggplot2)
library(ggpubr)
library(readr)
library(stringr)

ph_BOMP = read_delim("./GOHTRANS/GOH-TRANS_export_20240205.csv",
                     delim = ";", escape_double = FALSE, trim_ws = TRUE) %>% as_tibble()
#> Rows: 42 Columns: 347
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ";"
#> chr (104): Participant Status, Site Abbreviation, Participant Creation Date,...
#> dbl (243): Participant Id, pat_genderID, Age, Informed_consent#Yes, Informed...
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

df1 = ph_BOMP %>% select(`Participant Id`, starts_with("5.")) %>% mutate(subject = 1:42, numTeeth = `5.1|Number of teeth`, DMFT = `5.2|DMFT`, numBleedingSites = `5.3|Bleeding sites`, boppercent = `5.4|BOP%`, DPSI = `5.5|DPSI`, pH = `5.8|pH`) %>% select(subject, numTeeth, DMFT, numBleedingSites, boppercent, DPSI, pH)
df2 = ph_BOMP %>% select(`Participant Id`, starts_with("12.")) %>% mutate(subject = 1:42, numTeeth = `12.1|Number of teeth`, DMFT = `12.2|DMFT`, numBleedingSites = `12.3|Bleeding sites`, boppercent = `12.4|BOP%`, DPSI = `12.5|DPSI`, pH = `12.8|pH`) %>% select(subject, numTeeth, DMFT, numBleedingSites, boppercent, DPSI, pH)
df3 = ph_BOMP %>% select(`Participant Id`, starts_with("19.")) %>% mutate(subject = 1:42, numTeeth = `19.1|Number of teeth`, DMFT = `19.2|DMFT`, numBleedingSites = `19.3|Bleeding sites`, boppercent = `19.4|BOP%`, DPSI = `19.5|DPSI`, pH = `19.8|pH`) %>% select(subject, numTeeth, DMFT, numBleedingSites, boppercent, DPSI, pH)
df4 = ph_BOMP %>% select(`Participant Id`, starts_with("26.")) %>% mutate(subject = 1:42, numTeeth = `26.1|Number of teeth`, DMFT = `26.2|DMFT`, numBleedingSites = `26.3|Bleeding sites`, boppercent = `26.4|BOP%`, DPSI = `26.5|DPSI`, pH = `26.8|pH`) %>% select(subject, numTeeth, DMFT, numBleedingSites, boppercent, DPSI, pH)

otherMeta = rbind(df1, df2, df3, df4) %>% as_tibble() %>% mutate(newTimepoint = rep(c(0,3,6,12), each=42))
otherMeta = otherMeta %>% select(subject, newTimepoint, boppercent)
otherMeta[otherMeta$boppercent < 0 & !is.na(otherMeta$boppercent), "boppercent"] = NA

saliva_sampleMeta = read.csv("./GOHTRANS/sampleInfo_fixed.csv", sep=" ") %>% as_tibble() %>% select(subject, GenderID, Age, newTimepoint)

# CODS XI
df = read.csv("./GOHTRANS/CODS_XI.csv")
df = df[1:40,1:10] %>% as_tibble() %>% filter(Participant_code %in% saliva_sampleMeta$subject)

CODS = df %>% select(Participant_code, CODS_BASELINE, CODS_3_MONTHS, CODS_6_MONTHS, CODS_12_MONTHS) %>% pivot_longer(-Participant_code) %>% mutate(timepoint = NA)
CODS[CODS$name == "CODS_BASELINE", "timepoint"] = 0
CODS[CODS$name == "CODS_3_MONTHS", "timepoint"] = 3
CODS[CODS$name == "CODS_6_MONTHS", "timepoint"] = 6
CODS[CODS$name == "CODS_12_MONTHS", "timepoint"] = 12
CODS[CODS$value == -99, "value"] = NA
CODS = CODS %>% mutate(subject = Participant_code, newTimepoint = timepoint, CODS = value) %>% select(-Participant_code, -name, -value, -timepoint)

XI = df %>% select(Participant_code, XI_BASELINE, XI_3_MONTHS, XI_6_MONTHS, XI_12_MONTHS) %>% pivot_longer(-Participant_code) %>% mutate(timepoint = NA)
XI[XI$name == "XI_BASELINE", "timepoint"] = 0
XI[XI$name == "XI_3_MONTHS", "timepoint"] = 3
XI[XI$name == "XI_6_MONTHS", "timepoint"] = 6
XI[XI$name == "XI_12_MONTHS", "timepoint"] = 12
XI[XI$value == -99, "value"] = NA
XI = XI %>% mutate(subject = Participant_code, newTimepoint = timepoint, XI = value) %>% select(-Participant_code, -name, -value, -timepoint)

df = otherMeta %>% left_join(CODS) %>% left_join(XI) %>% select(subject, newTimepoint, boppercent, CODS, XI) %>% filter(newTimepoint != "NA")
#> Joining with `by = join_by(subject, newTimepoint)`
#> Joining with `by = join_by(subject, newTimepoint)`

meta = df %>% left_join(otherMeta) %>% left_join(saliva_sampleMeta) %>% filter(newTimepoint==12) %>% unique() %>% select(-newTimepoint)
#> Joining with `by = join_by(subject, newTimepoint, boppercent)`
#> Joining with `by = join_by(subject, newTimepoint)`
meta[meta < 0] <- NA

newBOP = residuals(lm(boppercent ~ GenderID + Age + CODS + XI, data=meta, na.action=na.exclude))
newCODS= residuals(lm(CODS ~ boppercent + GenderID + Age + XI, data=meta, na.action=na.exclude))
newXI = residuals(lm(XI ~ boppercent + GenderID + Age + CODS, data=meta, na.action=na.exclude))

meta = meta %>% mutate(boppercent = newBOP, CODS = newCODS, XI = newXI)

prepMetadata = function(model, metadata){
  longA = parafac4microbiome::transformPARAFACloadings(model$Fac, 2, moreOutput = TRUE)$F
  df = longA %>% as_tibble() %>% mutate(subject=as.integer(rep(metadata$mode1$subject, each=nrow(metadata$mode3))), newTimepoint = rep(metadata$mode3$newTimepoint, nrow(metadata$mode1))) %>% left_join(Cornejo2025$Blood_hormones) %>% left_join(Cornejo2025$Clinical_measurements)
  
  newTesto = residuals(lm(Free_testosterone_Vermeulen_pmol_L ~ Estradiol_pmol_ml + LH_U_L + SHBG_nmol_L + boppercent + pH + GenderID + Age, data=df, na.action=na.exclude))
  newEstra = residuals(lm(Estradiol_pmol_ml ~ Free_testosterone_Vermeulen_pmol_L + LH_U_L + SHBG_nmol_L + boppercent + pH + GenderID + Age, data=df, na.action=na.exclude))
  newLH = residuals(lm(LH_U_L ~ Free_testosterone_Vermeulen_pmol_L + Estradiol_pmol_ml + SHBG_nmol_L + boppercent + pH + GenderID + Age, data=df, na.action=na.exclude))
  newSHBG = residuals(lm(SHBG_nmol_L ~ Free_testosterone_Vermeulen_pmol_L + Estradiol_pmol_ml + LH_U_L + boppercent + pH + GenderID + Age, data=df, na.action=na.exclude))
  newBOP = residuals(lm(boppercent ~ Free_testosterone_Vermeulen_pmol_L + Estradiol_pmol_ml + LH_U_L + SHBG_nmol_L + pH + GenderID + Age, data=df, na.action=na.exclude))
  newPH = residuals(lm(pH ~ Free_testosterone_Vermeulen_pmol_L + Estradiol_pmol_ml + LH_U_L + SHBG_nmol_L + boppercent + GenderID + Age, data=df, na.action=na.exclude))
  
  df$Free_testosterone_Vermeulen_pmol_L = newTesto
  df$Estradiol_pmol_ml = newEstra
  df$LH_U_L = newLH
  df$SHBG_nmol_L = newSHBG
  df$boppercent = newBOP
  df$pH = newPH
  df$GenderID = as.numeric(as.factor(df$GenderID))
  
  return(df)
}

clinical_metadata = NPLStoolbox::Cornejo2025$Clinical_measurements$mode1 %>% mutate(BOP = NPLStoolbox::Cornejo2025$Clinical_measurements$data[,1,2], CODS = NPLStoolbox::Cornejo2025$Clinical_measurements$data[,2,2], XI = NPLStoolbox::Cornejo2025$Clinical_measurements$data[,3,2]) %>% left_join(NPLStoolbox::Cornejo2025$Subject_metadata %>% mutate(subject = as.character(subject)))
#> Joining with `by = join_by(subject, GenderID)`

testMetadata = function(model, comp, metadata){
  transformedSubjectLoadings = model$Fac[[1]][,comp]
  transformedSubjectLoadings = transformedSubjectLoadings / norm(transformedSubjectLoadings, "2")
  
  metadata = metadata$mode1 %>% left_join(clinical_metadata) %>% mutate(GenderID = as.numeric(as.factor(GenderID))-1)
  
  result = lm(transformedSubjectLoadings ~ GenderID + Age + BOP + CODS + XI, data=metadata)
  
  # Extract coefficients and confidence intervals
  coef_estimates <- summary(result)$coefficients
  conf_intervals <- confint(result)
  
  # Remove intercept
  coef_estimates <- coef_estimates[rownames(coef_estimates) != "(Intercept)", ]
  conf_intervals <- conf_intervals[rownames(conf_intervals) != "(Intercept)", ]
  
  # Combine into a clean data frame
  summary_table <- data.frame(
    Term     = rownames(coef_estimates),
    Estimate = coef_estimates[, "Estimate"] * 1e3,
    CI       = paste0(
                 conf_intervals[, 1], " – ",
                 conf_intervals[, 2]
               ),
    P_value  = coef_estimates[, "Pr(>|t|)"],
    P_adjust = p.adjust(coef_estimates[, "Pr(>|t|)"], "BH"),
    row.names = NULL
  )
  
  return(summary_table)
}

processedTongue = processDataCube(NPLStoolbox::Cornejo2025$Tongue_microbiome, sparsityThreshold = 0.5, considerGroups=TRUE, groupVariable="GenderID", CLR=TRUE, centerMode=1, scaleMode=2)
processedSaliva = processDataCube(NPLStoolbox::Cornejo2025$Salivary_microbiome, sparsityThreshold = 0.5, considerGroups=TRUE, groupVariable="GenderID", CLR=TRUE, centerMode=1, scaleMode=2)

processedCytokines = NPLStoolbox::Cornejo2025$Salivary_cytokines
processedCytokines$data = log(processedCytokines$data + 0.1300)
# Remove subject 1, 18, 19, and 27 due to being an outlier
processedCytokines$data = processedCytokines$data[-c(1,9,10,19),,]
processedCytokines$mode1 = processedCytokines$mode1[-c(1,9,10,19),]
processedCytokines$data = multiwayCenter(processedCytokines$data, 1)
processedCytokines$data = multiwayScale(processedCytokines$data, 2)

processedBiochemistry = NPLStoolbox::Cornejo2025$Salivary_biochemistry
processedBiochemistry$data = log(processedBiochemistry$data)
processedBiochemistry$data = multiwayCenter(processedBiochemistry$data, 1)
processedBiochemistry$data = multiwayScale(processedBiochemistry$data, 2)

processedHormones = NPLStoolbox::Cornejo2025$Circulatory_hormones
processedHormones$data = log(processedHormones$data)
processedHormones$data = multiwayCenter(processedHormones$data, 1)
processedHormones$data = multiwayScale(processedHormones$data, 2)

deltaT = NPLStoolbox::Cornejo2025$Circulatory_hormones$data[,1,2] - NPLStoolbox::Cornejo2025$Circulatory_hormones$data[,1,1]
outcome = processedHormones$mode1 %>% mutate(deltaT = deltaT) %>% filter(deltaT != "NA")

processedTongue = processDataCube(NPLStoolbox::Cornejo2025$Tongue_microbiome, sparsityThreshold = 0.5, considerGroups=TRUE, groupVariable="GenderID", CLR=TRUE, centerMode=FALSE, scaleMode=FALSE)
mask = processedTongue$mode1$subject %in% outcome$subject
processedTongue$mode1 = processedTongue$mode1[mask,]
processedTongue$data = processedTongue$data[mask,,]
processedTongue$data = multiwayCenter(processedTongue$data, 1)
processedTongue$data = multiwayScale(processedTongue$data, 2)

processedSaliva = processDataCube(NPLStoolbox::Cornejo2025$Salivary_microbiome, sparsityThreshold = 0.5, considerGroups=TRUE, groupVariable="GenderID", CLR=TRUE, centerMode=1, scaleMode=2)
mask = processedSaliva$mode1$subject %in% outcome$subject
processedSaliva$mode1 = processedSaliva$mode1[mask,]
processedSaliva$data = processedSaliva$data[mask,,]
processedSaliva$data = multiwayCenter(processedSaliva$data, 1)
processedSaliva$data = multiwayScale(processedSaliva$data, 2)

processedCytokines = NPLStoolbox::Cornejo2025$Salivary_cytokines
processedCytokines$data = log(processedCytokines$data + 0.1300)
# Remove subject 1, 18, 19, and 27 due to being an outlier
processedCytokines$data = processedCytokines$data[-c(1,9,10,19),,]
processedCytokines$mode1 = processedCytokines$mode1[-c(1,9,10,19),]
mask = processedCytokines$mode1$subject %in% outcome$subject
processedCytokines$mode1 = processedCytokines$mode1[mask,]
processedCytokines$data = processedCytokines$data[mask,,]
processedCytokines$data = multiwayCenter(processedCytokines$data, 1)
processedCytokines$data = multiwayScale(processedCytokines$data, 2)

processedBiochemistry = NPLStoolbox::Cornejo2025$Salivary_biochemistry
processedBiochemistry$data = log(processedBiochemistry$data)
mask = processedBiochemistry$mode1$subject %in% outcome$subject
processedBiochemistry$mode1 = processedBiochemistry$mode1[mask,]
processedBiochemistry$data = processedBiochemistry$data[mask,,]
processedBiochemistry$data = multiwayCenter(processedBiochemistry$data, 1)
processedBiochemistry$data = multiwayScale(processedBiochemistry$data, 2)

Y = processedTongue$mode1 %>% left_join(outcome) %>% select(deltaT) %>% pull()
#> Joining with `by = join_by(subject, GenderID)`
Ycnt = Y - mean(Y)
Ynorm_tongue = Ycnt / norm(Ycnt, "2")

Y = processedSaliva$mode1 %>% left_join(outcome) %>% select(deltaT) %>% pull()
#> Joining with `by = join_by(subject, GenderID)`
Ycnt = Y - mean(Y)
Ynorm_saliva = Ycnt / norm(Ycnt, "2")

Y = processedCytokines$mode1 %>% left_join(outcome) %>% select(deltaT) %>% pull()
#> Joining with `by = join_by(subject, GenderID)`
Ycnt = Y - mean(Y)
Ynorm_cytokine = Ycnt / norm(Ycnt, "2")

Y = processedBiochemistry$mode1 %>% left_join(outcome) %>% select(deltaT) %>% pull()
#> Joining with `by = join_by(subject, GenderID)`
Ycnt = Y - mean(Y)
Ynorm_biochem = Ycnt / norm(Ycnt, "2")

Model selection

# CV_tongue = NPLStoolbox::ncrossreg(processedTongue$data, Ynorm_tongue, maxNumComponents = 10)
# CV_saliva = NPLStoolbox::ncrossreg(processedSaliva$data, Ynorm_saliva, maxNumComponents = 10)
# CV_cyto = NPLStoolbox::ncrossreg(processedCytokines$data, Ynorm_cytokine, maxNumComponents = 10)
# CV_bio = NPLStoolbox::ncrossreg(processedBiochemistry$data, Ynorm_biochem, maxNumComponents=10)
# 
# saveRDS(CV_tongue, "./NPLS_CV_tongue_deltaT.RDS")
# saveRDS(CV_saliva, "./NPLS_CV_saliva_deltaT.RDS")
# saveRDS(CV_cyto, "./NPLS_CV_cyto_deltaT.RDS")
# saveRDS(CV_bio, "./NPLS_CV_bio_deltaT.RDS")
# 
# CV_tongue = readRDS("./NPLS_CV_tongue_deltaT.RDS")
# CV_saliva = readRDS("./NPLS_CV_saliva_deltaT.RDS")
# CV_cyto = readRDS("./NPLS_CV_cyto_deltaT.RDS")
# CV_bio = readRDS("./NPLS_CV_bio_deltaT.RDS")
# 
# a=CV_tongue$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_tongue$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b) # 1
# 
# a=CV_saliva$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_saliva$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b) # 1
# 
# a=CV_cyto$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_cyto$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b) # 1
# 
# a=CV_bio$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_bio$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b) # 1

# npls_tongue = NPLStoolbox::triPLS1(processedTongue$data, Ynorm_tongue, 1)
# npls_saliva = NPLStoolbox::triPLS1(processedSaliva$data, Ynorm_saliva, 1)
# npls_cyto = NPLStoolbox::triPLS1(processedCytokines$data, Ynorm_cytokine, 1)
# npls_bio = NPLStoolbox::triPLS1(processedBiochemistry$data, Ynorm_biochem, 1)
# 
# saveRDS(npls_tongue, "./NPLS_tongue_deltaT.RDS")
# saveRDS(npls_saliva, "./NPLS_saliva_deltaT.RDS")
# saveRDS(npls_cyto, "./NPLS_cyto_deltaT.RDS")
# saveRDS(npls_bio, "./NPLS_bio_deltaT.RDS")

npls_tongue = readRDS("./GOHTRANS/NPLS_tongue_deltaT.RDS")
npls_saliva = readRDS("./GOHTRANS/NPLS_saliva_deltaT.RDS")
npls_cyto = readRDS("./GOHTRANS/NPLS_cyto_deltaT.RDS")
npls_bio = readRDS("./GOHTRANS/NPLS_bio_deltaT.RDS")

Tongue microbiome

testMetadata(npls_tongue, 1, processedTongue)
#> Joining with `by = join_by(subject, GenderID)`
#>       Term     Estimate                                        CI    P_value
#> 1 GenderID -146.0032589  -0.266023951164381 – -0.0259825666217627 0.01911011
#> 2      Age   -0.1299597  -0.0135672435837477 – 0.0133073242724576 0.98426614
#> 3      BOP    3.0539354 -0.00483738995653187 – 0.0109452608045115 0.43293306
#> 4     CODS    2.7772659  -0.0106562762659828 – 0.0162108080786144 0.67390326
#> 5       XI    3.3350268  -0.0105643068932559 – 0.0172343605614402 0.62550130
#>     P_adjust
#> 1 0.09555055
#> 2 0.98426614
#> 3 0.84237908
#> 4 0.84237908
#> 5 0.84237908

a = processedTongue$mode1 %>%
  mutate(comp=npls_tongue$Fac[[1]][,1]) %>%
  ggplot(aes(x=as.factor(GenderID),y=comp)) +
  geom_boxplot() +
  stat_compare_means(comparisons=list(c("TM","TW")), label="p.signif") +
  xlab("Gender identity") +
  ylab("Loading") +
  theme(text=element_text(size=16))

temp = processedTongue$mode2 %>%
  as_tibble() %>%
  mutate(Component_1 = npls_tongue$Fac[[2]][,1]) %>%
  mutate(Genus = gsub("g__", "", Genus), Species = gsub("s__", "", Species)) %>%
  mutate(Species = gsub("\\(", "", Species)) %>%
  mutate(Species = gsub("\\)", "", Species))

fixedSpeciesNames = str_split_fixed(temp$Species, "/", 5) %>% 
  as_tibble() %>%
  mutate(dplyr::across(.cols = everything(), .fns = ~ dplyr::if_else(stringr::str_detect(.x, "sp."), "", .x)))
#> Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
#> `.name_repair` is omitted as of tibble 2.0.0.
#> ℹ Using compatibility `.name_repair`.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
fixedSpeciesNames[fixedSpeciesNames$V1 == "", "V1"] = "sp."
fixedSpeciesNames = fixedSpeciesNames %>% 
  mutate(name=paste0(V1, "/", V2)) %>% 
  mutate(name=gsub("/$", "", name)) %>% 
  mutate(name=paste0(name, "/", V3)) %>% 
  mutate(name=gsub("/$", "", name)) %>% 
  mutate(name=paste0(name, "/", V4)) %>% 
  mutate(name=gsub("/$", "", name)) %>% 
  mutate(name=paste0(name, "/", V5)) %>% 
  mutate(name=gsub("/$", "", name))

temp$Species = fixedSpeciesNames$name
temp$name = paste0(temp$Genus, " ", temp$Species)
temp[temp$name == " sp.", "name"] = "Unclassified"
temp[temp$name == " ", "name"] = "Unclassified"
temp[temp$name == " sp.", "name"] = "Unclassified"

temp = temp %>%
  arrange(Component_1) %>%
  mutate(index=1:nrow(.)) %>%
  filter(index %in% c(1:10,213:222))

b=temp %>%
  ggplot(aes(x=Component_1,y=as.factor(index),fill=as.factor(Phylum))) +
  geom_bar(stat="identity", col="black") +
  scale_y_discrete(label=temp$name) +
  xlab("Loading") +
  ylab("") +
  guides(fill=guide_legend(title="Phylum")) +
  theme(text=element_text(size=16))

c = npls_tongue$Fac[[3]][,1] %>%
  as_tibble() %>%
  mutate(value=-1*value) %>%
  mutate(timepoint=c(0,3,6,12)) %>%
  ggplot(aes(x=timepoint,y=value)) +
  geom_line() +
  geom_point() +
  xlab("Time point [months]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(text=element_text(size=16))

a

Salivary microbiome

testMetadata(npls_saliva, 1, processedSaliva)
#> Joining with `by = join_by(subject, GenderID)`
#>       Term     Estimate                                        CI    P_value
#> 1 GenderID -151.2510652  -0.279791282367381 – -0.0227108480443339 0.02294631
#> 2      Age    3.0405540  -0.0113505594011473 – 0.0174316674214563 0.66719488
#> 3      BOP    5.3079802 -0.00314350146723959 – 0.0137594618286484 0.20766265
#> 4     CODS   -3.7471649  -0.0181342709520459 – 0.0106399411508536 0.59641461
#> 5       XI   -0.1190497  -0.0150050109726752 – 0.0147669115500696 0.98698934
#>    P_adjust
#> 1 0.1147315
#> 2 0.8339936
#> 3 0.5191566
#> 4 0.8339936
#> 5 0.9869893

a = processedSaliva$mode1 %>%
  mutate(comp=npls_saliva$Fac[[1]][,1]) %>%
  ggplot(aes(x=as.factor(GenderID),y=comp)) +
  geom_boxplot() +
  stat_compare_means(comparisons=list(c("TM","TW")), label="p.signif") +
  xlab("Gender identity") +
  ylab("Loading") +
  theme(text=element_text(size=16))

temp = processedSaliva$mode2 %>%
  as_tibble() %>%
  mutate(Component_1 = -1*npls_saliva$Fac[[2]][,1]) %>%
  mutate(Genus = gsub("g__", "", Genus), Species = gsub("s__", "", Species)) %>%
  mutate(Species = gsub("\\(", "", Species)) %>%
  mutate(Species = gsub("\\)", "", Species))

fixedSpeciesNames = str_split_fixed(temp$Species, "/", 5) %>% 
  as_tibble() %>%
  mutate(dplyr::across(.cols = everything(), .fns = ~ dplyr::if_else(stringr::str_detect(.x, "sp."), "", .x)))
fixedSpeciesNames[fixedSpeciesNames$V1 == "", "V1"] = "sp."
fixedSpeciesNames = fixedSpeciesNames %>% 
  mutate(name=paste0(V1, "/", V2)) %>% 
  mutate(name=gsub("/$", "", name)) %>% 
  mutate(name=paste0(name, "/", V3)) %>% 
  mutate(name=gsub("/$", "", name)) %>% 
  mutate(name=paste0(name, "/", V4)) %>% 
  mutate(name=gsub("/$", "", name)) %>% 
  mutate(name=paste0(name, "/", V5)) %>% 
  mutate(name=gsub("/$", "", name))

temp$Species = fixedSpeciesNames$name
temp$name = paste0(temp$Genus, " ", temp$Species)
temp[temp$name == " sp.", "name"] = "Unclassified"
temp[temp$name == " ", "name"] = "Unclassified"
temp[temp$name == " sp.", "name"] = "Unclassified"

temp = temp %>%
  arrange(Component_1) %>%
  mutate(index=1:nrow(.)) %>%
  filter(index %in% c(1:10,251:260))

b=temp %>%
  ggplot(aes(x=Component_1,y=as.factor(index),fill=as.factor(Phylum))) +
  geom_bar(stat="identity", col="black") +
  scale_y_discrete(label=temp$name) +
  xlab("Loading") +
  ylab("") +
  guides(fill=guide_legend(title="Phylum")) +
  theme(text=element_text(size=16))

c = npls_saliva$Fac[[3]][,1] %>%
  as_tibble() %>%
  mutate(value=value) %>%
  mutate(timepoint=c(0,3,6,12)) %>%
  ggplot(aes(x=timepoint,y=value)) +
  geom_line() +
  geom_point() +
  xlab("Time point [months]") +
  ylab("Loading") +
  ylim(0,1)

a

# Salivary cytokines

testMetadata(npls_cyto, 1, processedCytokines)
#> Joining with `by = join_by(subject, GenderID)`
#>       Term    Estimate                                        CI    P_value
#> 1 GenderID -157.807812   -0.373026599093144 – 0.0574109754715683 0.13812064
#> 2      Age    7.156904  -0.0162848911715474 – 0.0305986991843004 0.52320055
#> 3      BOP   15.067426 -0.00127830792711726 – 0.0314131605144182 0.06807157
#> 4     CODS  -51.616492    -0.402993987873639 – 0.299761004737455 0.75736016
#> 5       XI    8.824141  -0.0164756264949713 – 0.0341239080914935 0.46680152
#>    P_adjust
#> 1 0.3453016
#> 2 0.6540007
#> 3 0.3403578
#> 4 0.7573602
#> 5 0.6540007

Salivary biochemistry

testMetadata(npls_bio, 1, processedBiochemistry)
#> Joining with `by = join_by(subject, GenderID)`
#>       Term    Estimate                                        CI    P_value
#> 1 GenderID -156.225906  -0.285656710827189 – -0.0267951010155874 0.01996409
#> 2      Age   -4.528260 -0.0190190823798637 – 0.00996256139587153 0.52570273
#> 3      BOP   -1.854863 -0.0103649001380471 – 0.00665517492477034 0.65737051
#> 4     CODS    2.516212   -0.0119705747472031 – 0.017002998779039 0.72355422
#> 5       XI    3.968734  -0.0110203642172829 – 0.0189578323409959 0.59037036
#>     P_adjust
#> 1 0.09982045
#> 2 0.72355422
#> 3 0.72355422
#> 4 0.72355422
#> 5 0.72355422

a = processedBiochemistry$mode1 %>%
  mutate(comp=npls_bio$Fac[[1]][,1]) %>%
  ggplot(aes(x=as.factor(GenderID),y=comp)) +
  geom_boxplot() +
  stat_compare_means(comparisons=list(c("TM","TW")), label="p.signif") +
  xlab("Gender identity") +
  ylab("Loading") +
  theme(text=element_text(size=16))

temp = processedBiochemistry$mode2 %>%
  mutate(Comp = -1*npls_bio$Fac[[2]][,1]) %>%
  arrange(Comp) %>%
  mutate(index=1:nrow(.)) %>%
  mutate(V1 = c("pH", "S-IgA", "Chitinase", "MUC-5B", "Amylase", "Total protein", "Lysozyme"))

b = temp %>%
  ggplot(aes(x=Comp,y=as.factor(index))) +
  geom_bar(stat="identity", col="black") +
  xlab("Loading") +
  ylab("") +
  scale_y_discrete(labels=temp$V1) +
  theme(text=element_text(size=16))

c = npls_bio$Fac[[3]][,1] %>%
  as_tibble() %>%
  mutate(value=value) %>%
  mutate(timepoint=c(0,3,6,12)) %>% 
  ggplot(aes(x=timepoint,y=value)) +
  geom_line() +
  geom_point() +
  xlab("Time point [months]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(text=element_text(size=16))

a