Preamble
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'CMTFtoolbox'
## The following object is masked from 'package:NPLStoolbox':
##
## npred
## The following objects are masked from 'package:parafac4microbiome':
##
## fac_to_vect, reinflateFac, reinflateTensor, vect_to_fac
ph_BOMP = read_delim("./GOHTRANS/GOH-TRANS_export_20240205.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE) %>% as_tibble()
## Rows: 42 Columns: 347
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (104): Participant Status, Site Abbreviation, Participant Creation Date,...
## dbl (243): Participant Id, pat_genderID, Age, Informed_consent#Yes, Informed...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df1 = ph_BOMP %>% select(`Participant Id`, starts_with("5.")) %>% mutate(subject = 1:42, numTeeth = `5.1|Number of teeth`, DMFT = `5.2|DMFT`, numBleedingSites = `5.3|Bleeding sites`, boppercent = `5.4|BOP%`, DPSI = `5.5|DPSI`, pH = `5.8|pH`) %>% select(subject, numTeeth, DMFT, numBleedingSites, boppercent, DPSI, pH)
df2 = ph_BOMP %>% select(`Participant Id`, starts_with("12.")) %>% mutate(subject = 1:42, numTeeth = `12.1|Number of teeth`, DMFT = `12.2|DMFT`, numBleedingSites = `12.3|Bleeding sites`, boppercent = `12.4|BOP%`, DPSI = `12.5|DPSI`, pH = `12.8|pH`) %>% select(subject, numTeeth, DMFT, numBleedingSites, boppercent, DPSI, pH)
df3 = ph_BOMP %>% select(`Participant Id`, starts_with("19.")) %>% mutate(subject = 1:42, numTeeth = `19.1|Number of teeth`, DMFT = `19.2|DMFT`, numBleedingSites = `19.3|Bleeding sites`, boppercent = `19.4|BOP%`, DPSI = `19.5|DPSI`, pH = `19.8|pH`) %>% select(subject, numTeeth, DMFT, numBleedingSites, boppercent, DPSI, pH)
df4 = ph_BOMP %>% select(`Participant Id`, starts_with("26.")) %>% mutate(subject = 1:42, numTeeth = `26.1|Number of teeth`, DMFT = `26.2|DMFT`, numBleedingSites = `26.3|Bleeding sites`, boppercent = `26.4|BOP%`, DPSI = `26.5|DPSI`, pH = `26.8|pH`) %>% select(subject, numTeeth, DMFT, numBleedingSites, boppercent, DPSI, pH)
otherMeta = rbind(df1, df2, df3, df4) %>% as_tibble() %>% mutate(newTimepoint = rep(c(0,3,6,12), each=42))
otherMeta = otherMeta %>% select(subject, newTimepoint, boppercent)
otherMeta[otherMeta$boppercent < 0 & !is.na(otherMeta$boppercent), "boppercent"] = NA
saliva_sampleMeta = read.csv("./GOHTRANS/sampleInfo_fixed.csv", sep=" ") %>% as_tibble() %>% select(subject, GenderID, Age, newTimepoint)
# CODS XI
df = read.csv("./GOHTRANS/CODS_XI.csv")
df = df[1:40,1:10] %>% as_tibble() %>% filter(Participant_code %in% saliva_sampleMeta$subject)
CODS = df %>% select(Participant_code, CODS_BASELINE, CODS_3_MONTHS, CODS_6_MONTHS, CODS_12_MONTHS) %>% pivot_longer(-Participant_code) %>% mutate(timepoint = NA)
CODS[CODS$name == "CODS_BASELINE", "timepoint"] = 0
CODS[CODS$name == "CODS_3_MONTHS", "timepoint"] = 3
CODS[CODS$name == "CODS_6_MONTHS", "timepoint"] = 6
CODS[CODS$name == "CODS_12_MONTHS", "timepoint"] = 12
CODS[CODS$value == -99, "value"] = NA
CODS = CODS %>% mutate(subject = Participant_code, newTimepoint = timepoint, CODS = value) %>% select(-Participant_code, -name, -value, -timepoint)
XI = df %>% select(Participant_code, XI_BASELINE, XI_3_MONTHS, XI_6_MONTHS, XI_12_MONTHS) %>% pivot_longer(-Participant_code) %>% mutate(timepoint = NA)
XI[XI$name == "XI_BASELINE", "timepoint"] = 0
XI[XI$name == "XI_3_MONTHS", "timepoint"] = 3
XI[XI$name == "XI_6_MONTHS", "timepoint"] = 6
XI[XI$name == "XI_12_MONTHS", "timepoint"] = 12
XI[XI$value == -99, "value"] = NA
XI = XI %>% mutate(subject = Participant_code, newTimepoint = timepoint, XI = value) %>% select(-Participant_code, -name, -value, -timepoint)
df = otherMeta %>% left_join(CODS) %>% left_join(XI) %>% select(subject, newTimepoint, boppercent, CODS, XI) %>% filter(newTimepoint != "NA")
## Joining with `by = join_by(subject, newTimepoint)`
## Joining with `by = join_by(subject, newTimepoint)`
## Joining with `by = join_by(subject, newTimepoint, boppercent)`
## Joining with `by = join_by(subject, newTimepoint)`
meta[meta < 0] <- NA
meta$GenderID = as.numeric(as.factor(meta$GenderID)) - 1
staticMeta = meta %>% filter(newTimepoint == 0) %>% select(subject, GenderID, Age)
dynamicMeta = meta %>% select(-GenderID, -Age)
#
# newBOP = residuals(lm(boppercent ~ GenderID + Age + CODS + XI, data=meta, na.action=na.exclude))
# newCODS= residuals(lm(CODS ~ boppercent + GenderID + Age + XI, data=meta, na.action=na.exclude))
# newXI = residuals(lm(XI ~ boppercent + GenderID + Age + CODS, data=meta, na.action=na.exclude))
#
# meta = meta %>% mutate(boppercent = newBOP, CODS = newCODS, XI = newXI)
## Joining with `by = join_by(subject, GenderID)`
testMetadata = function(model, comp, metadata){
transformedSubjectLoadings = model$Fac[[1]][,comp]
transformedSubjectLoadings = transformedSubjectLoadings / norm(transformedSubjectLoadings, "2")
metadata = metadata$mode1 %>% left_join(clinical_metadata) %>% mutate(GenderID = as.numeric(as.factor(GenderID))-1)
result = lm(transformedSubjectLoadings ~ GenderID + Age + BOP + CODS + XI, data=metadata)
# Extract coefficients and confidence intervals
coef_estimates <- summary(result)$coefficients
conf_intervals <- confint(result)
# Remove intercept
coef_estimates <- coef_estimates[rownames(coef_estimates) != "(Intercept)", ]
conf_intervals <- conf_intervals[rownames(conf_intervals) != "(Intercept)", ]
# Combine into a clean data frame
summary_table <- data.frame(
Term = rownames(coef_estimates),
Estimate = coef_estimates[, "Estimate"] * 1e3,
CI = paste0(
conf_intervals[, 1], " – ",
conf_intervals[, 2]
),
P_value = coef_estimates[, "Pr(>|t|)"],
P_adjust = p.adjust(coef_estimates[, "Pr(>|t|)"], "BH"),
row.names = NULL
)
return(summary_table)
}
Model selection
processedTongue = processDataCube(NPLStoolbox::Cornejo2025$Tongue_microbiome, sparsityThreshold = 0.5, considerGroups=TRUE, groupVariable="GenderID", CLR=TRUE, centerMode=1, scaleMode=2)
processedSaliva = processDataCube(NPLStoolbox::Cornejo2025$Salivary_microbiome, sparsityThreshold = 0.5, considerGroups=TRUE, groupVariable="GenderID", CLR=TRUE, centerMode=1, scaleMode=2)
processedCytokines = NPLStoolbox::Cornejo2025$Salivary_cytokines
processedCytokines$data = log(processedCytokines$data + 0.1300)
# Remove subject 1, 18, 19, and 27 due to being an outlier
processedCytokines$data = processedCytokines$data[-c(1,9,10,19),,]
processedCytokines$mode1 = processedCytokines$mode1[-c(1,9,10,19),]
processedCytokines$data = multiwayCenter(processedCytokines$data, 1)
processedCytokines$data = multiwayScale(processedCytokines$data, 2)
processedBiochemistry = NPLStoolbox::Cornejo2025$Salivary_biochemistry
processedBiochemistry$data = log(processedBiochemistry$data)
processedBiochemistry$data = multiwayCenter(processedBiochemistry$data, 1)
processedBiochemistry$data = multiwayScale(processedBiochemistry$data, 2)
# assessment_tongue = parafac4microbiome::assessModelQuality(processedTongue$data, numRepetitions=10, numCores=10)
# assessment_tongue$plots$overview # 1 or 2?
#
# assessment_saliva = parafac4microbiome::assessModelQuality(processedSaliva$data, numRepetitions=10, numCores=10)
# assessment_saliva$plots$overview # def 2
#
# assessment_cytokines = parafac4microbiome::assessModelQuality(processedCytokines$data, numRepetitions=10, numCores=10)
# assessment_cytokines$plots$overview # def 2
#
# assessment_biochemistry = parafac4microbiome::assessModelQuality(processedBiochemistry$data, numRepetitions=10, numCores=10)
# assessment_biochemistry$plots$overview # 2 or 3?
#
# saveRDS(assessment_tongue, "./CP_assessment_tongue.RDS")
# saveRDS(assessment_saliva, "./CP_assessment_saliva.RDS")
# saveRDS(assessment_cytokines, "./CP_assessment_cytokines.RDS")
# saveRDS(assessment_biochemistry, "./CP_assessment_biochemistry.RDS")
#
# assessment_tongue = readRDS("./CP_assessment_tongue.RDS")
# assessment_saliva = readRDS("./CP_assessment_saliva.RDS")
# assessment_cytokines = readRDS("./CP_assessment_cytokines.RDS")
# assessment_biochemistry = readRDS("./CP_assessment_biochemistry.RDS")
#
# a = assessment_tongue$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))
# b = assessment_saliva$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))
# c = assessment_cytokines$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))
# d = assessment_biochemistry$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))
# ggarrange(a,b,c,d)
# colourCols = c("", "", "")
# legendTitles = c("", "", "")
# xLabels = c("Subject index", "Feature index", "Time index")
# legendColNums = c(2,5,0)
# arrangeModes = c(FALSE, FALSE, FALSE)
# continuousModes = c(FALSE,FALSE,TRUE)
#
# stability_tongue = parafac4microbiome::assessModelStability(processedTongue, maxNumComponents=3, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_tongue$modelPlots[[2]]
# stability_tongue$modelPlots[[3]]
#
# stability_saliva = parafac4microbiome::assessModelStability(processedSaliva, maxNumComponents=3, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_saliva$modelPlots[[2]]
# stability_saliva$modelPlots[[2]]
#
# stability_cytokines = parafac4microbiome::assessModelStability(processedCytokines, maxNumComponents=3, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_cytokines$modelPlots[[2]]
# stability_cytokines$modelPlots[[3]]
#
# stability_biochemistry = parafac4microbiome::assessModelStability(processedBiochemistry, maxNumComponents=3, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_biochemistry$modelPlots[[2]]
# stability_biochemistry$modelPlots[[3]]
# cp_tongue = parafac4microbiome::parafac(processedTongue$data, nfac=2, nstart=100)
# cp_saliva = parafac4microbiome::parafac(processedSaliva$data, nfac=2, nstart=100)
# cp_cytokines = parafac4microbiome::parafac(processedCytokines$data, nfac=2, nstart=100)
# cp_biochemistry = parafac4microbiome::parafac(processedBiochemistry$data, nfac=2, nstart=100)
#
# saveRDS(cp_tongue, "./cp_tongue.RDS")
# saveRDS(cp_saliva, "./cp_saliva.RDS")
# saveRDS(cp_cytokines, "./cp_cytokines.RDS")
# saveRDS(cp_biochemistry, "./cp_biochemistry.RDS")
cp_tongue = readRDS("./GOHTRANS/cp_tongue.RDS")
cp_saliva = readRDS("./GOHTRANS/cp_saliva.RDS")
cp_cytokines = readRDS("./GOHTRANS/cp_cytokines.RDS")
cp_biochemistry = readRDS("./GOHTRANS/cp_biochemistry.RDS")
Tongue microbiome
testMetadata(cp_tongue, 1, processedTongue)
## Joining with `by = join_by(subject, GenderID)`
## Term Estimate CI P_value
## 1 GenderID 121.4677838 -2.46115922752504e-05 – 0.242960179217603 0.05004334
## 2 Age 1.2397817 -0.0120682478356951 – 0.014547811241472 0.85003536
## 3 BOP 2.4968042 -0.00497023980355535 – 0.00996384822005768 0.49901353
## 4 CODS 0.5381136 -0.0119608431733796 – 0.0130370704610451 0.93035385
## 5 XI -4.9945589 -0.0188281123330526 – 0.0088389945248416 0.46571634
## P_adjust
## 1 0.2502167
## 2 0.9303539
## 3 0.8316892
## 4 0.9303539
## 5 0.8316892
testMetadata(cp_tongue, 2, processedTongue)
## Joining with `by = join_by(subject, GenderID)`
## Term Estimate CI P_value
## 1 GenderID 59.914254 -0.0657307291997982 – 0.185559237353633 0.3370374
## 2 Age -1.942659 -0.0157055547933125 – 0.0118202363189334 0.7746069
## 3 BOP -1.262477 -0.00898474294192472 – 0.00645978948107809 0.7402082
## 4 CODS -4.893111 -0.0178192803655215 – 0.0080330573884708 0.4445972
## 5 XI -6.251158 -0.0205575397778675 – 0.00805522373616812 0.3783908
## P_adjust
## 1 0.7409954
## 2 0.7746069
## 3 0.7746069
## 4 0.7409954
## 5 0.7409954
Salivary microbiome
testMetadata(cp_saliva, 1, processedSaliva)
## Joining with `by = join_by(subject, GenderID)`
## Term Estimate CI P_value
## 1 GenderID -45.0221163 -0.178052895480722 – 0.0880086629409456 0.4938634
## 2 Age 3.0529562 -0.0115189644032717 – 0.0176248767956416 0.6710904
## 3 BOP -1.6631517 -0.00983935629840586 – 0.00651305291998757 0.6800939
## 4 CODS -0.9060191 -0.0145920276329919 – 0.0127799895072467 0.8931035
## 5 XI 1.7175717 -0.0134297828369515 – 0.0168649263219845 0.8180171
## P_adjust
## 1 0.8931035
## 2 0.8931035
## 3 0.8931035
## 4 0.8931035
## 5 0.8931035
testMetadata(cp_saliva, 2, processedSaliva)
## Joining with `by = join_by(subject, GenderID)`
## Term Estimate CI P_value
## 1 GenderID 118.0788404 -0.00224170005304913 – 0.238399380941789 0.05412757
## 2 Age 6.5090792 -0.00667058769451647 – 0.0196887461831477 0.32036701
## 3 BOP 3.0035619 -0.00439145876278687 – 0.0103985825138551 0.41245932
## 4 CODS 0.4777822 -0.0119006159669152 – 0.0128561802899763 0.93754365
## 5 XI 0.3906881 -0.0133094337962947 – 0.0140908099806615 0.95383322
## P_adjust
## 1 0.2706379
## 2 0.6874322
## 3 0.6874322
## 4 0.9538332
## 5 0.9538332
Salivary cytokines
testMetadata(cp_cytokines, 1, processedCytokines)
## Joining with `by = join_by(subject, GenderID)`
## Term Estimate CI P_value
## 1 GenderID -27.05017814 -0.251532298438523 – 0.197431942164215 0.8007915
## 2 Age 16.67084513 -0.00723963736661261 – 0.0405813276341731 0.1579702
## 3 BOP -0.09082505 -0.0164640352183346 – 0.0162823851145396 0.9907222
## 4 CODS -28.95668163 -0.402598657967525 – 0.34468529471301 0.8710047
## 5 XI -15.99762836 -0.0429168315278513 – 0.0109215747988014 0.2245843
## P_adjust
## 1 0.9907222
## 2 0.5614609
## 3 0.9907222
## 4 0.9907222
## 5 0.5614609
testMetadata(cp_cytokines, 2, processedCytokines)
## Joining with `by = join_by(subject, GenderID)`
## Term Estimate CI P_value
## 1 GenderID 162.363038 -0.0550666388070594 – 0.379792714707578 0.1323173
## 2 Age -6.714968 -0.029874266224001 – 0.0164443312143381 0.5458402
## 3 BOP -10.075196 -0.0259340174258618 – 0.00578362484505684 0.1957476
## 4 CODS 95.954957 -0.265948494090199 – 0.457858408923401 0.5803389
## 5 XI -1.713409 -0.0277869044719951 – 0.02436008731968 0.8904703
## P_adjust
## 1 0.4893690
## 2 0.7254237
## 3 0.4893690
## 4 0.7254237
## 5 0.8904703
Salivary biochemistry
testMetadata(cp_biochemistry, 1, processedBiochemistry)
## Joining with `by = join_by(subject, GenderID)`
## Term Estimate CI P_value
## 1 GenderID 81.395726 -0.0343343553682499 – 0.197125807465893 0.16076105
## 2 Age 3.177143 -0.00949969477808753 – 0.0158539800238245 0.61171034
## 3 BOP 6.688526 -0.000424360420959855 – 0.0138014124523289 0.06428731
## 4 CODS 4.938192 -0.00696794674040549 – 0.0168443303204457 0.40275497
## 5 XI 8.405007 -0.00477242944168497 – 0.0215824425531161 0.20199880
## P_adjust
## 1 0.3366647
## 2 0.6117103
## 3 0.3214366
## 4 0.5034437
## 5 0.3366647
testMetadata(cp_biochemistry, 2, processedBiochemistry)
## Joining with `by = join_by(subject, GenderID)`
## Term Estimate CI P_value
## 1 GenderID -1.5487722 -0.106748335420374 – 0.103650791078514 0.9761557
## 2 Age -0.3542818 -0.0118776277377812 – 0.0111690640408018 0.9502317
## 3 BOP -4.8835426 -0.0113492128200526 – 0.00158212771674754 0.1330520
## 4 CODS -5.6334319 -0.0164562063991478 – 0.00518934258952479 0.2954278
## 5 XI -6.9012715 -0.018879665442852 – 0.00507712253655297 0.2478597
## P_adjust
## 1 0.9761557
## 2 0.9761557
## 3 0.4923796
## 4 0.4923796
## 5 0.4923796