Processing
# Faeces
newFaeces = NPLStoolbox::Jakobsen2025$faeces
mask = newFaeces$mode1$subject %in% (NPLStoolbox::Jakobsen2025$faeces$mode1 %>% filter(!is.na(BMI)) %>% select(subject) %>% pull())
newFaeces$data = newFaeces$data[mask,,]
newFaeces$mode1 = newFaeces$mode1[mask,]
processedFaeces = parafac4microbiome::processDataCube(newFaeces, sparsityThreshold = 0.75, centerMode=1, scaleMode=2)
# Milk
newMilk = NPLStoolbox::Jakobsen2025$milkMicrobiome
mask = newMilk$mode1$subject %in% (NPLStoolbox::Jakobsen2025$milkMicrobiome$mode1 %>% filter(!is.na(BMI)) %>% select(subject) %>% pull())
newMilk$data = newMilk$data[mask,,]
newMilk$mode1 = newMilk$mode1[mask,]
processedMilk = parafac4microbiome::processDataCube(newMilk, sparsityThreshold=0.85, centerMode=1, scaleMode=2)
# Milk metab
newMilkMetab = NPLStoolbox::Jakobsen2025$milkMetabolomics
mask = newMilkMetab$mode1$subject %in% (NPLStoolbox::Jakobsen2025$milkMetabolomics$mode1 %>% filter(!is.na(BMI)) %>% select(subject) %>% pull())
newMilkMetab$data = newMilkMetab$data[mask,,]
newMilkMetab$mode1 = newMilkMetab$mode1[mask,]
processedMilkMetab = parafac4microbiome::processDataCube(newMilkMetab, CLR=FALSE, centerMode=1, scaleMode=2)
Y = processedFaeces$mode1$BMI
Ycnt = Y - mean(Y)
Ynorm_faecal = Ycnt / norm(Ycnt, "2")
Y = processedMilk$mode1$BMI
Ycnt = Y - mean(Y)
Ynorm_milk = Ycnt / norm(Ycnt, "2")
Y = processedMilkMetab$mode1$BMI
Ycnt = Y - mean(Y)
Ynorm_milkMetab = Ycnt / norm(Ycnt, "2")
CV_faecal = NPLStoolbox::ncrossreg(processedFaeces$data, Ynorm_faecal, maxNumComponents = 5, cvFolds=10)
CV_milk = NPLStoolbox::ncrossreg(processedMilk$data, Ynorm_milk, maxNumComponents = 5, cvFolds=10)
CV_milkMetab = NPLStoolbox::ncrossreg(processedMilkMetab$data, Ynorm_milkMetab, maxNumComponents = 5, cvFolds=10)
a=CV_faecal$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
b=CV_faecal$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
ggarrange(a,b) # 1 comp



NPLS_faecal = NPLStoolbox::triPLS1(processedFaeces$data, Ynorm_faecal, 1)
NPLS_milk = NPLStoolbox::triPLS1(processedMilk$data, Ynorm_milk, 1)
NPLS_milkMetab = NPLStoolbox::triPLS1(processedMilkMetab$data, Ynorm_milkMetab, 2)
testMetadata = function(model, comp, metadata){
transformedSubjectLoadings = transformPARAFACloadings(model$Fac, 1)[,comp]
transformedSubjectLoadings = transformedSubjectLoadings / norm(transformedSubjectLoadings, "2")
result = lm(transformedSubjectLoadings ~ BMI + C.section + Secretor + Lewis + whz.6m, data=metadata$mode1)
# Extract coefficients and confidence intervals
coef_estimates <- summary(result)$coefficients
conf_intervals <- confint(result)
# Remove intercept
coef_estimates <- coef_estimates[rownames(coef_estimates) != "(Intercept)", ]
conf_intervals <- conf_intervals[rownames(conf_intervals) != "(Intercept)", ]
# Combine into a clean data frame
summary_table <- data.frame(
Term = rownames(coef_estimates),
Estimate = coef_estimates[, "Estimate"] * 1e3,
CI = paste0(
conf_intervals[, 1], " – ",
conf_intervals[, 2]
),
P_value = coef_estimates[, "Pr(>|t|)"],
P_adjust = p.adjust(coef_estimates[, "Pr(>|t|)"], "BH"),
row.names = NULL
)
return(summary_table)
}
testMetadata(NPLS_faecal, 1, processedFaeces)
#> Term Estimate CI P_value
#> 1 BMI 5.898149 0.00344491075499029 – 0.00835138722475675 5.296571e-06
#> 2 C.section 26.215456 -0.0186520767670303 – 0.0710829891553144 2.497335e-01
#> 3 Secretor 18.704297 -0.0122908996607013 – 0.0496994936374461 2.346150e-01
#> 4 Lewis -67.425763 -0.155253390661857 – 0.020401865528336 1.311911e-01
#> 5 whz.6m 1.488789 -0.0102479002724083 – 0.0132254776748785 8.021876e-01
#> P_adjust
#> 1 2.648286e-05
#> 2 3.121669e-01
#> 3 3.121669e-01
#> 4 3.121669e-01
#> 5 8.021876e-01
testMetadata(NPLS_milk, 1, processedMilk)
#> Term Estimate CI P_value
#> 1 BMI 5.2632203 0.0028082806290875 – 0.00771815987488316 4.271728e-05
#> 2 C.section 22.5298730 -0.0223837277098485 – 0.0674434737293296 3.227116e-01
#> 3 Secretor 10.1431497 -0.0208977465387988 – 0.041184045914344 5.189801e-01
#> 4 Lewis -25.4005435 -0.113265523144618 – 0.0624644361527853 5.682343e-01
#> 5 whz.6m -0.5937088 -0.012348966843658 – 0.0111615493028556 9.205334e-01
#> P_adjust
#> 1 0.0002135864
#> 2 0.7102929209
#> 3 0.7102929209
#> 4 0.7102929209
#> 5 0.9205334443
testMetadata(NPLS_milkMetab, 1, processedMilkMetab)
#> Term Estimate CI P_value
#> 1 BMI 8.670681 0.00664584079115655 – 0.0106955221180947 5.530018e-14
#> 2 C.section 11.074774 -0.0258924224871216 – 0.0480419695984113 5.543102e-01
#> 3 Secretor 19.681300 -0.00616028587042244 – 0.0455228862023762 1.342499e-01
#> 4 Lewis -78.258205 -0.166342392359317 – 0.00982598189078401 8.113566e-02
#> 5 whz.6m -12.584073 -0.0222555492384812 – -0.00291259607365677 1.118449e-02
#> P_adjust
#> 1 2.765009e-13
#> 2 5.543102e-01
#> 3 1.678123e-01
#> 4 1.352261e-01
#> 5 2.796123e-02
testMetadata(NPLS_milkMetab, 2, processedMilkMetab)
#> Term Estimate CI P_value
#> 1 BMI -0.1981968 -0.002175283642196 – 0.00177888998370945 8.430536e-01
#> 2 C.section -12.7552845 -0.048850646024158 – 0.0233400770550342 4.856154e-01
#> 3 Secretor -111.0853182 -0.136317456123891 – -0.085853180193343 1.501529e-14
#> 4 Lewis 42.9906785 -0.0430161307912073 – 0.128997487797659 3.244425e-01
#> 5 whz.6m 7.0106029 -0.00243278155702489 – 0.016453987320985 1.442727e-01
#> P_adjust
#> 1 8.430536e-01
#> 2 6.070193e-01
#> 3 7.507646e-14
#> 4 5.407375e-01
#> 5 3.606817e-01
colourCols = c("BMI.group", "V3", "")
legendTitles = c("BMI group", "Phylum", "")
xLabels = c("Subject index", "Feature index", "Time index")
legendColNums = c(3,5,0)
arrangeModes = c(TRUE, TRUE, FALSE)
continuousModes = c(FALSE,FALSE,TRUE)
parafac4microbiome::plotPARAFACmodel(NPLS_faecal$Fac, processedFaeces, 1, colourCols, legendTitles, xLabels, legendColNums, arrangeModes, continuousModes,
overallTitle = "Infant faecal microbiome")

parafac4microbiome::plotPARAFACmodel(NPLS_milk$Fac, processedMilk, 1, colourCols, legendTitles, xLabels, legendColNums, arrangeModes, continuousModes,
overallTitle = "HM microbiome")

parafac4microbiome::plotPARAFACmodel(NPLS_milkMetab$Fac, processedMilkMetab, 2, colourCols, legendTitles, xLabels, legendColNums, arrangeModes, continuousModes, overallTitle = "HM metabolome")
