Skip to contents

Preamble

testMetadata = function(model, comp, metadata){
  transformedSubjectLoadings = model$Fac[[1]][,comp]
  transformedSubjectLoadings = transformedSubjectLoadings / norm(transformedSubjectLoadings, "2")
  
  result = lm(transformedSubjectLoadings ~ BMI + C.section + Secretor + Lewis + whz.6m, data=metadata$mode1)
  
  # Extract coefficients and confidence intervals
  coef_estimates <- summary(result)$coefficients
  conf_intervals <- confint(result)
  
  # Remove intercept
  coef_estimates <- coef_estimates[rownames(coef_estimates) != "(Intercept)", ]
  conf_intervals <- conf_intervals[rownames(conf_intervals) != "(Intercept)", ]
  
  # Combine into a clean data frame
  summary_table <- data.frame(
    Term     = rownames(coef_estimates),
    Estimate = coef_estimates[, "Estimate"] * 1e3,
    CI       = paste0(
                 conf_intervals[, 1], " – ",
                 conf_intervals[, 2]
               ),
    P_value  = coef_estimates[, "Pr(>|t|)"],
    P_adjust = p.adjust(coef_estimates[, "Pr(>|t|)"], "BH"),
    row.names = NULL
  )
  
  return(summary_table)
}
# Old approach
# Needed colours: BMI groups (3) and phyla levels (6)
# colours = hue_pal()(9)
# #show_col(colours)
# BMI_cols = colours[c(1,4,7)]
# phylum_cols = colours[-c(1,4,7)]

# Rasmus's suggestion
colours = RColorBrewer:: brewer.pal(8, "Dark2")
BMI_cols = c("darkgreen","darkgoldenrod","darkred") #colours[1:3]
WHZ_cols = c("darkgreen", "darkgoldenrod", "dodgerblue4")
phylum_cols = colours

Maternal pre-pregnancy BMI

Processing

# Faeces
newFaeces = NPLStoolbox::Jakobsen2025$faeces

mask = newFaeces$mode1$subject %in% (NPLStoolbox::Jakobsen2025$faeces$mode1 %>% filter(!is.na(BMI)) %>% select(subject) %>% pull())
newFaeces$data = newFaeces$data[mask,,]
newFaeces$mode1 = newFaeces$mode1[mask,]
processedFaeces = parafac4microbiome::processDataCube(newFaeces, sparsityThreshold = 0.75, centerMode=1, scaleMode=2)

# Milk
newMilk = NPLStoolbox::Jakobsen2025$milkMicrobiome

mask = newMilk$mode1$subject %in% (NPLStoolbox::Jakobsen2025$milkMicrobiome$mode1 %>% filter(!is.na(BMI)) %>% select(subject) %>% pull())
newMilk$data = newMilk$data[mask,,]
newMilk$mode1 = newMilk$mode1[mask,]
processedMilk = parafac4microbiome::processDataCube(newMilk, sparsityThreshold=0.85, centerMode=1, scaleMode=2)

# Milk metab
newMilkMetab = NPLStoolbox::Jakobsen2025$milkMetabolomics

mask = newMilkMetab$mode1$subject %in% (NPLStoolbox::Jakobsen2025$milkMetabolomics$mode1 %>% filter(!is.na(BMI)) %>% select(subject) %>% pull())
newMilkMetab$data = newMilkMetab$data[mask,,]
newMilkMetab$mode1 = newMilkMetab$mode1[mask,]
processedMilkMetab = parafac4microbiome::processDataCube(newMilkMetab, CLR=FALSE, centerMode=1, scaleMode=2)
Y = processedFaeces$mode1$BMI
Ycnt = Y - mean(Y)
Ynorm_faecal = Ycnt / norm(Ycnt, "2")

Y = processedMilk$mode1$BMI
Ycnt = Y - mean(Y)
Ynorm_milk = Ycnt / norm(Ycnt, "2")

Y = processedMilkMetab$mode1$BMI
Ycnt = Y - mean(Y)
Ynorm_milkMetab = Ycnt / norm(Ycnt, "2")

Model selection

# CV_faecal = NPLStoolbox::ncrossreg(processedFaeces$data, Ynorm_faecal)
# CV_milk = NPLStoolbox::ncrossreg(processedMilk$data, Ynorm_milk)
# CV_milkMetab = NPLStoolbox::ncrossreg(processedMilkMetab$data, Ynorm_milkMetab)
# 
# saveRDS(CV_faecal, "./MAINHEALTH/NPLS_cv_faecal_BMI.RDS")
# saveRDS(CV_milk, "./MAINHEALTH/NPLS_cv_milk_BMI.RDS")
# saveRDS(CV_milkMetab, "./MAINHEALTH/NPLS_cv_milkMetab_BMI.RDS")
# 
# CV_faecal = readRDS("./NPLS_cv_faecal_BMI.RDS")
# CV_milk = readRDS("./NPLS_cv_milk_BMI.RDS")
# CV_milkMetab = readRDS("./NPLS_cv_milkMetab_BMI.RDS")
# 
# a=CV_faecal$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_faecal$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b) # 1 comp
# 
# a=CV_milk$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_milk$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b) # 1 or 2 comps
# 
# a=CV_milkMetab$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_milkMetab$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b) # 1 comp
# NPLS_faecal = NPLStoolbox::triPLS1(processedFaeces$data, Ynorm_faecal, 1)
# NPLS_milk = NPLStoolbox::triPLS1(processedMilk$data, Ynorm_milk, 1)
# NPLS_milkMetab = NPLStoolbox::triPLS1(processedMilkMetab$data, Ynorm_milkMetab, 2)
# 
# saveRDS(NPLS_faecal, "./NPLS_faecal_BMI.RDS")
# saveRDS(NPLS_milk, "./NPLS_milk_BMI.RDS")
# saveRDS(NPLS_milkMetab, "./NPLS_milkMetab_BMI.RDS")

NPLS_faecal = readRDS("./MAINHEALTH/20250702_NPLS_faecal_bmi.RDS") # copied from paper (model is the same as above)
NPLS_milk = readRDS("./MAINHEALTH/20250702_NPLS_milk_bmi.RDS")
NPLS_milkMetab = readRDS("./MAINHEALTH/20250702_NPLS_milkMetab_bmi.RDS")

Infant faecal microbiome

testMetadata(NPLS_faecal, 1, processedFaeces)
#>        Term   Estimate                                       CI      P_value
#> 1       BMI   5.898149 0.0034449107549903 – 0.00835138722475675 5.296571e-06
#> 2 C.section  26.215456 -0.0186520767670303 – 0.0710829891553144 2.497335e-01
#> 3  Secretor  18.704297 -0.0122908996607012 – 0.0496994936374461 2.346150e-01
#> 4     Lewis -67.425763   -0.155253390661857 – 0.020401865528336 1.311911e-01
#> 5    whz.6m   1.488789 -0.0102479002724083 – 0.0132254776748785 8.021876e-01
#>       P_adjust
#> 1 2.648286e-05
#> 2 3.121669e-01
#> 3 3.121669e-01
#> 4 3.121669e-01
#> 5 8.021876e-01
# subject mode
a = processedFaeces$mode1 %>%
  mutate(comp1 = NPLS_faecal$Fac[[1]][,1]) %>%
  ggplot(aes(x=BMI,y=comp1)) +
  geom_point() +
  stat_cor() +
  xlab("Maternal ppBMI") +
  ylab("Loading") +
  theme(text=element_text(size=16))

# feature mode
topIndices = processedFaeces$mode2 %>% mutate(index=1:nrow(.), Comp = NPLS_faecal$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
bottomIndices = processedFaeces$mode2 %>% mutate(index=1:nrow(.), Comp = NPLS_faecal$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()

Xhat = parafac4microbiome::reinflateTensor(NPLS_faecal$Fac[[1]][,1], NPLS_faecal$Fac[[2]][,1], NPLS_faecal$Fac[[3]][,1])

print("Positive loadings:")
#> [1] "Positive loadings:"
print(cor(Xhat[,topIndices[1],1], processedFaeces$mode1$BMI))
#> [1] 0.3472706
print("Negative loadings:")
#> [1] "Negative loadings:"
print(cor(Xhat[,bottomIndices[1],1], processedFaeces$mode1$BMI))
#> [1] -0.3472706

df = processedFaeces$mode2 %>%
  mutate(Component_1 = NPLS_faecal$Fac[[2]][,1]) %>%
  arrange(Component_1) %>%
  mutate(index=1:nrow(.)) %>%
  mutate(Genus = gsub("g__", "", V7), Species = gsub("s__", "", V8)) %>% 
  mutate(Species = str_split_fixed(Species, "_", 2)[,2]) %>% 
  mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "sp."), "", .x))) %>%
  mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "organism"), "", .x))) %>%
  mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "bacterium"), "", .x))) %>%
  filter(index %in% c(1:10, 84:93))

df[df$Genus == "" & df$Species == "", "Species"] = "Unknown"
df[df$Species == "", "Species"] = "sp."
df$name = paste0(df$Genus, " ", df$Species)
  
b = df %>% ggplot(aes(x=Component_1,y=as.factor(index),fill=as.factor(V3))) +
  geom_bar(stat="identity", col="black") +
  xlab("Loading") +
  ylab("") +
  scale_y_discrete(labels=df$name) +
  scale_fill_manual(name="Phylum", values=hue_pal()(5)[-5], labels=c("Actinobacteriota", "Bacteroidota", "Firmicutes", "Proteobacteria")) +
  theme(text=element_text(size=16))

# time mode
c = NPLS_faecal$Fac[[3]][,1] %>%
  as_tibble() %>% 
  mutate(value=value) %>% 
  ggplot(aes(x=c(30,60,90),y=value)) +
  geom_line() +
  geom_point() +
  xlab("Time point [day]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(text=element_text(size=16))

a

b

c

HM microbiome

testMetadata(NPLS_milk, 1, processedMilk)
#>        Term    Estimate                                       CI      P_value
#> 1       BMI   5.2632203 0.0028082806290875 – 0.00771815987488316 4.271728e-05
#> 2 C.section  22.5298730 -0.0223837277098485 – 0.0674434737293296 3.227116e-01
#> 3  Secretor  10.1431497  -0.0208977465387988 – 0.041184045914344 5.189801e-01
#> 4     Lewis -25.4005435  -0.113265523144618 – 0.0624644361527853 5.682343e-01
#> 5    whz.6m  -0.5937088  -0.012348966843658 – 0.0111615493028556 9.205334e-01
#>       P_adjust
#> 1 0.0002135864
#> 2 0.7102929209
#> 3 0.7102929209
#> 4 0.7102929209
#> 5 0.9205334443
# subject mode
a = processedMilk$mode1 %>%
  mutate(comp1 = NPLS_milk$Fac[[1]][,1]) %>%
  ggplot(aes(x=BMI,y=comp1)) +
  geom_point() +
  stat_cor() +
  xlab("Maternal ppBMI") +
  ylab("Loading") +
  theme(text=element_text(size=16))

# feature mode
topIndices = processedMilk$mode2 %>% mutate(index=1:nrow(.), Comp = NPLS_milk$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
bottomIndices = processedMilk$mode2 %>% mutate(index=1:nrow(.), Comp = NPLS_milk$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()

Xhat = parafac4microbiome::reinflateTensor(NPLS_milk$Fac[[1]][,1], NPLS_milk$Fac[[2]][,1], NPLS_milk$Fac[[3]][,1])

print("Positive loadings:")
#> [1] "Positive loadings:"
print(cor(Xhat[,topIndices[1],1], processedMilk$mode1$BMI)) # flip
#> [1] -0.3351345
print("Negative loadings:")
#> [1] "Negative loadings:"
print(cor(Xhat[,bottomIndices[1],1], processedMilk$mode1$BMI))
#> [1] 0.3351345

df = processedMilk$mode2 %>%
  mutate(Component_1 = -1*NPLS_milk$Fac[[2]][,1]) %>%
  arrange(Component_1) %>%
  mutate(index=1:nrow(.)) %>%
  mutate(Genus = gsub("g__", "", V7), Species = gsub("s__", "", V8)) %>% 
  mutate(Species = str_split_fixed(Species, "_", 2)[,2]) %>% 
  mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "sp."), "", .x))) %>%
  mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "organism"), "", .x))) %>%
  mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "bacterium"), "", .x))) %>%
  filter(index %in% c(1:10, 106:115))

df[df$Genus == "" & df$Species == "", "Species"] = "Unknown"
df[df$Species == "", "Species"] = "sp."
df$name = paste0(df$Genus, " ", df$Species)
  
b = df %>% ggplot(aes(x=Component_1,y=as.factor(index),fill=as.factor(V3))) +
  geom_bar(stat="identity", col="black") +
  xlab("Loading") +
  ylab("") +
  scale_y_discrete(labels=df$name) +
  scale_fill_manual(name="Phylum", values=hue_pal()(5)[-5], labels=c("Actinobacteriota", "Bacteroidota", "Firmicutes", "Proteobacteria")) +
  theme(text=element_text(size=16))

# time mode
c = NPLS_milk$Fac[[3]][,1] %>%
  as_tibble() %>% 
  mutate(value=-1*value) %>% 
  ggplot(aes(x=c(0,30,60,90),y=value)) +
  geom_line() +
  geom_point() +
  xlab("Time point [day]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(text=element_text(size=16))

a

b

c

HM metabolome

testMetadata(NPLS_milkMetab, 1, processedMilkMetab)
#>        Term   Estimate                                         CI      P_value
#> 1       BMI   8.436042   0.00639137955287423 – 0.0104807043090265 2.963990e-13
#> 2 C.section  12.030701   -0.0252983772275604 – 0.0493597786809759 5.247398e-01
#> 3  Secretor  30.260127    0.00416557076519333 – 0.056354683431391 2.339660e-02
#> 4     Lewis -80.278456    -0.16922492265323 – 0.00866801160802368 7.648230e-02
#> 5    whz.6m -12.918737 -0.0226848901579012 – -0.00315258357667813 9.937333e-03
#>       P_adjust
#> 1 1.481995e-12
#> 2 5.247398e-01
#> 3 3.899434e-02
#> 4 9.560288e-02
#> 5 2.484333e-02
# subject mode
a = processedMilkMetab$mode1 %>%
  mutate(comp1 = NPLS_milkMetab$Fac[[1]][,1]) %>%
  ggplot(aes(x=BMI,y=comp1)) +
  geom_point() +
  stat_cor() +
  xlab("Maternal ppBMI") +
  ylab("Loading") +
  theme(text=element_text(size=16))

# feature mode
topIndices = processedMilkMetab$mode2 %>% mutate(index=1:nrow(.), Comp = NPLS_milkMetab$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
bottomIndices = processedMilkMetab$mode2 %>% mutate(index=1:nrow(.), Comp = NPLS_milkMetab$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()

Xhat = parafac4microbiome::reinflateTensor(NPLS_milkMetab$Fac[[1]][,1], NPLS_milkMetab$Fac[[2]][,1], NPLS_milkMetab$Fac[[3]][,1])

print("Positive loadings:")
#> [1] "Positive loadings:"
print(cor(Xhat[,topIndices[1],1], processedMilkMetab$mode1$BMI))
#> [1] 0.5572628
print("Negative loadings:")
#> [1] "Negative loadings:"
print(cor(Xhat[,bottomIndices[1],1], processedMilkMetab$mode1$BMI))
#> [1] -0.5572628

temp = processedMilkMetab$mode2 %>% mutate(Component_1 = NPLS_milkMetab$Fac[[2]][,1]) %>% arrange(Component_1) %>% mutate(index=1:nrow(.)) %>% filter(index %in% c(1:10, 61:70))

b=temp %>%
  ggplot(aes(x=Component_1,y=as.factor(index),fill=as.factor(Class),pattern=as.factor(Class))) +
  geom_bar_pattern(stat="identity",
                   colour="black",
                   pattern="stripe",
                   pattern_fill="black",
                   pattern_density=0.2,
                   pattern_spacing=0.05,
                   pattern_angle=45,
                   pattern_size=0.2) +
  scale_y_discrete(label=temp$Metabolite) +
  xlab("Loading") +
  ylab("") +
  scale_fill_manual(name="Class", values=hue_pal()(7)[-5], labels=c("Amino acids and derivatives", "Energy related", "Fatty acids and derivatives", "Food derived", "Oligosaccharides", "Sugars")) +
  theme(text=element_text(size=16), legend.position="none")

# time mode
c = NPLS_milkMetab$Fac[[3]][,1] %>%
  as_tibble() %>% 
  mutate(value=value) %>% 
  ggplot(aes(x=c(0,30,60,90),y=value)) +
  geom_line() +
  geom_point() +
  xlab("Time point [day]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(text=element_text(size=16))

a

b

c

Infant weight-over-height z-score at six months

Processing

# Faeces
newFaeces = NPLStoolbox::Jakobsen2025$faeces

mask1 = newFaeces$mode1$subject %in% (NPLStoolbox::Jakobsen2025$faeces$mode1 %>% filter(!is.na(whz.6m)) %>% select(subject) %>% pull())
mask2 = newFaeces$mode1$subject != "322"
mask = mask1 & mask2
newFaeces$data = newFaeces$data[mask,,]
newFaeces$mode1 = newFaeces$mode1[mask,]
processedFaeces = parafac4microbiome::processDataCube(newFaeces, sparsityThreshold = 0.75, centerMode=1, scaleMode=2) # was 0.75

# Milk
newMilk = NPLStoolbox::Jakobsen2025$milkMicrobiome

mask = newMilk$mode1$subject %in% (NPLStoolbox::Jakobsen2025$milkMicrobiome$mode1 %>% filter(!is.na(whz.6m)) %>% select(subject) %>% pull())
newMilk$data = newMilk$data[mask,,]
newMilk$mode1 = newMilk$mode1[mask,]
processedMilk = parafac4microbiome::processDataCube(newMilk, sparsityThreshold=0.85, centerMode=1, scaleMode=2) # was 0.85

# Milk metab
newMilkMetab = NPLStoolbox::Jakobsen2025$milkMetabolomics

mask = newMilkMetab$mode1$subject %in% (NPLStoolbox::Jakobsen2025$milkMetabolomics$mode1 %>% filter(!is.na(whz.6m)) %>% select(subject) %>% pull())
newMilkMetab$data = newMilkMetab$data[mask,,-1]
newMilkMetab$mode1 = newMilkMetab$mode1[mask,]
newMilkMetab$mode3 = newMilkMetab$mode3[-1,]
processedMilkMetab = parafac4microbiome::processDataCube(newMilkMetab, CLR=FALSE, centerMode=1, scaleMode=2)
Y = processedFaeces$mode1$whz.6m
Ycnt = Y - mean(Y)
Ynorm_faecal = Ycnt / norm(Ycnt, "2")

Y = processedMilk$mode1$whz.6m
Ycnt = Y - mean(Y)
Ynorm_milk = Ycnt / norm(Ycnt, "2")

Y = processedMilkMetab$mode1$whz.6m
Ycnt = Y - mean(Y)
Ynorm_milkMetab = Ycnt / norm(Ycnt, "2")

Model selection

# CV_faecal = NPLStoolbox::ncrossreg(processedFaeces$data, Ynorm_faecal)
# CV_milk = NPLStoolbox::ncrossreg(processedMilk$data, Ynorm_milk)
# CV_milkMetab = NPLStoolbox::ncrossreg(processedMilkMetab$data, Ynorm_milkMetab)
# 
# saveRDS(CV_faecal, "./MAINHEALTH/NPLS_cv_faecal_BMI.RDS")
# saveRDS(CV_milk, "./MAINHEALTH/NPLS_cv_milk_BMI.RDS")
# saveRDS(CV_milkMetab, "./MAINHEALTH/NPLS_cv_milkMetab_BMI.RDS")
# 
# CV_faecal = readRDS("./NPLS_cv_faecal_BMI.RDS")
# CV_milk = readRDS("./NPLS_cv_milk_BMI.RDS")
# CV_milkMetab = readRDS("./NPLS_cv_milkMetab_BMI.RDS")
# 
# a=CV_faecal$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_faecal$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b) # 1 comp
# 
# a=CV_milk$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_milk$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b) # 1 or 2 comps
# 
# a=CV_milkMetab$varExp %>% pivot_longer(-numComponents) %>% ggplot(aes(x=as.factor(numComponents),y=value,col=as.factor(name),group=as.factor(name))) + geom_line() + geom_point() + xlab("Number of components") + ylab("Variance explained (%)") + theme(legend.position="none")
# b=CV_milkMetab$RMSE %>% ggplot(aes(x=numComponents,y=RMSE)) + geom_line() + geom_point() + xlab("Number of components") + ylab("RMSECV")
# ggarrange(a,b) # 1 comp
# NPLS_faecal = NPLStoolbox::triPLS1(processedFaeces$data, Ynorm_faecal, 1)
# NPLS_milk = NPLStoolbox::triPLS1(processedMilk$data, Ynorm_milk, 1)
# NPLS_milkMetab = NPLStoolbox::triPLS1(processedMilkMetab$data, Ynorm_milkMetab, 2)
# 
# saveRDS(NPLS_faecal, "./NPLS_faecal_BMI.RDS")
# saveRDS(NPLS_milk, "./NPLS_milk_BMI.RDS")
# saveRDS(NPLS_milkMetab, "./NPLS_milkMetab_BMI.RDS")

NPLS_faecal = readRDS("./MAINHEALTH/20250702_NPLS_faecal_whz.RDS")
NPLS_milk = readRDS("./MAINHEALTH/20250702_NPLS_milk_whz.RDS")
NPLS_milkMetab = readRDS("./MAINHEALTH/20250702_NPLS_milkMetab_whz.RDS")

Infant faecal microbiome

testMetadata(NPLS_faecal, 1, processedFaeces)
#>        Term   Estimate                                          CI      P_value
#> 1       BMI   2.110906 -0.000563660471044009 – 0.00478547212944011 0.1208012322
#> 2 C.section  48.317474  -0.000583193126835087 – 0.0972181419517821 0.0527523625
#> 3  Secretor  17.889671    -0.0159154384713816 – 0.0516947797774786 0.2969361598
#> 4     Lewis -83.311435     -0.178990514960686 – 0.0123676446394298 0.0873024247
#> 5    whz.6m  23.212088     0.0104288246136979 – 0.0359953513693422 0.0004681401
#>    P_adjust
#> 1 0.1510015
#> 2 0.1318809
#> 3 0.2969362
#> 4 0.1455040
#> 5 0.0023407
# subject mode
a = processedFaeces$mode1 %>%
  mutate(comp1 = NPLS_faecal$Fac[[1]][,1]) %>%
  ggplot(aes(x=whz.6m,y=comp1)) +
  geom_point() +
  stat_cor() +
  xlab("Infant 6-month WHZ") +
  ylab("Loading") +
  theme(text=element_text(size=16))

# feature mode
topIndices = processedFaeces$mode2 %>% mutate(index=1:nrow(.), Comp = NPLS_faecal$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
bottomIndices = processedFaeces$mode2 %>% mutate(index=1:nrow(.), Comp = NPLS_faecal$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()

Xhat = parafac4microbiome::reinflateTensor(NPLS_faecal$Fac[[1]][,1], NPLS_faecal$Fac[[2]][,1], NPLS_faecal$Fac[[3]][,1])

print("Positive loadings:")
#> [1] "Positive loadings:"
print(cor(Xhat[,topIndices[1],1], processedFaeces$mode1$whz.6m))
#> [1] -0.3351143
print("Negative loadings:")
#> [1] "Negative loadings:"
print(cor(Xhat[,bottomIndices[1],1], processedFaeces$mode1$whz.6m))
#> [1] 0.3351143

df = processedFaeces$mode2 %>%
  mutate(Component_1 = -1*NPLS_faecal$Fac[[2]][,1]) %>%
  arrange(Component_1) %>%
  mutate(index=1:nrow(.)) %>%
  mutate(Genus = gsub("g__", "", V7), Species = gsub("s__", "", V8)) %>% 
  mutate(Species = str_split_fixed(Species, "_", 2)[,2]) %>% 
  mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "sp."), "", .x))) %>%
  mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "organism"), "", .x))) %>%
  mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "bacterium"), "", .x))) %>%
  filter(index %in% c(1:10, 84:93))

df[df$Genus == "" & df$Species == "", "Species"] = "Unknown"
df[df$Species == "", "Species"] = "sp."
df$name = paste0(df$Genus, " ", df$Species)
  
b = df %>% ggplot(aes(x=Component_1,y=as.factor(index),fill=as.factor(V3))) +
  geom_bar(stat="identity", col="black") +
  xlab("Loading") +
  ylab("") +
  scale_y_discrete(labels=df$name) +
  scale_fill_manual(name="Phylum", values=hue_pal()(5)[-5], labels=c("Actinobacteriota", "Bacteroidota", "Firmicutes", "Proteobacteria")) +
  theme(text=element_text(size=16))

# time mode
c = NPLS_faecal$Fac[[3]][,1] %>%
  as_tibble() %>% 
  mutate(value=-1*value) %>% 
  ggplot(aes(x=c(30,60,90),y=value)) +
  geom_line() +
  geom_point() +
  xlab("Time point [day]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(text=element_text(size=16))

a

b

c

HM microbiome

testMetadata(NPLS_milk, 1, processedMilk)
#>        Term  Estimate                                         CI      P_value
#> 1       BMI  3.619784 0.000919112513437887 – 0.00632045481617513 0.0090248586
#> 2 C.section 28.892024   -0.0205172827975998 – 0.0783013314250206 0.2493411802
#> 3  Secretor  7.069697   -0.0270782924260968 – 0.0412176869697414 0.6826797911
#> 4     Lewis -6.834201    -0.103494181064122 – 0.0898257796709901 0.8889328165
#> 5    whz.6m 23.511923    0.0105800019938657 – 0.0364438448837068 0.0004607509
#>      P_adjust
#> 1 0.022562147
#> 2 0.415568634
#> 3 0.853349739
#> 4 0.888932817
#> 5 0.002303755
# subject mode
a = processedMilk$mode1 %>%
  mutate(comp1 = NPLS_milk$Fac[[1]][,1]) %>%
  ggplot(aes(x=whz.6m,y=comp1)) +
  geom_point() +
  stat_cor() +
  xlab("Maternal ppBMI") +
  ylab("Loading") +
  theme(text=element_text(size=16))

# feature mode
topIndices = processedMilk$mode2 %>% mutate(index=1:nrow(.), Comp = NPLS_milk$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
bottomIndices = processedMilk$mode2 %>% mutate(index=1:nrow(.), Comp = NPLS_milk$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()

Xhat = parafac4microbiome::reinflateTensor(NPLS_milk$Fac[[1]][,1], NPLS_milk$Fac[[2]][,1], NPLS_milk$Fac[[3]][,1])

print("Positive loadings:")
#> [1] "Positive loadings:"
print(cor(Xhat[,topIndices[1],1], processedMilk$mode1$whz.6m)) # flip
#> [1] 0.312612
print("Negative loadings:")
#> [1] "Negative loadings:"
print(cor(Xhat[,bottomIndices[1],1], processedMilk$mode1$whz.6m))
#> [1] -0.312612

df = processedMilk$mode2 %>%
  mutate(Component_1 = -1*NPLS_milk$Fac[[2]][,1]) %>%
  arrange(Component_1) %>%
  mutate(index=1:nrow(.)) %>%
  mutate(Genus = gsub("g__", "", V7), Species = gsub("s__", "", V8)) %>% 
  mutate(Species = str_split_fixed(Species, "_", 2)[,2]) %>% 
  mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "sp."), "", .x))) %>%
  mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "organism"), "", .x))) %>%
  mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "bacterium"), "", .x))) %>%
  filter(index %in% c(1:10, 106:115))

df[df$Genus == "" & df$Species == "", "Species"] = "Unknown"
df[df$Species == "", "Species"] = "sp."
df$name = paste0(df$Genus, " ", df$Species)
  
b = df %>% ggplot(aes(x=Component_1,y=as.factor(index),fill=as.factor(V3))) +
  geom_bar(stat="identity", col="black") +
  xlab("Loading") +
  ylab("") +
  scale_y_discrete(labels=df$name) +
  theme(text=element_text(size=16))

# time mode
c = NPLS_milk$Fac[[3]][,1] %>%
  as_tibble() %>% 
  mutate(value=-1*value) %>% 
  ggplot(aes(x=c(0,30,60,90),y=value)) +
  geom_line() +
  geom_point() +
  xlab("Time point [day]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(text=element_text(size=16))

a

b

c
#> Warning: Removed 4 rows containing missing values or values outside the scale range
#> (`geom_line()`).
#> Warning: Removed 4 rows containing missing values or values outside the scale range
#> (`geom_point()`).

HM metabolome

testMetadata(NPLS_milkMetab, 1, processedMilkMetab)
#>        Term   Estimate                                        CI      P_value
#> 1       BMI  -4.201449 -0.0065484366313087 – -0.0018544615122335 5.571588e-04
#> 2 C.section -13.368907  -0.0562174881702821 – 0.0294796738718076 5.380324e-01
#> 3  Secretor -80.629893  -0.110582808524329 – -0.0506769778677665 4.475097e-07
#> 4     Lewis  39.067983    -0.0630301697222898 – 0.14116613579865 4.502868e-01
#> 5    whz.6m  30.446281   0.0192360974045573 – 0.0416564642322284 3.612822e-07
#>       P_adjust
#> 1 9.285980e-04
#> 2 5.380324e-01
#> 3 1.118774e-06
#> 4 5.380324e-01
#> 5 1.118774e-06
# subject mode
a = processedMilkMetab$mode1 %>%
  mutate(comp1 = NPLS_milkMetab$Fac[[1]][,1]) %>%
  ggplot(aes(x=whz.6m,y=comp1)) +
  geom_point() +
  stat_cor() +
  xlab("Maternal ppBMI") +
  ylab("Loading") +
  theme(text=element_text(size=16))

# feature mode
topIndices = processedMilkMetab$mode2 %>% mutate(index=1:nrow(.), Comp = NPLS_milkMetab$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
bottomIndices = processedMilkMetab$mode2 %>% mutate(index=1:nrow(.), Comp = NPLS_milkMetab$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()

Xhat = parafac4microbiome::reinflateTensor(NPLS_milkMetab$Fac[[1]][,1], NPLS_milkMetab$Fac[[2]][,1], NPLS_milkMetab$Fac[[3]][,1])

print("Positive loadings:")
#> [1] "Positive loadings:"
print(cor(Xhat[,topIndices[1],1], processedMilkMetab$mode1$whz.6m))
#> [1] -0.3974434
print("Negative loadings:")
#> [1] "Negative loadings:"
print(cor(Xhat[,bottomIndices[1],1], processedMilkMetab$mode1$whz.6m))
#> [1] 0.3974434

temp = processedMilkMetab$mode2 %>% mutate(Component_1 = NPLS_milkMetab$Fac[[2]][,1]) %>% arrange(Component_1) %>% mutate(index=1:nrow(.)) %>% filter(index %in% c(1:10, 61:70))

b=temp %>%
  ggplot(aes(x=Component_1,y=as.factor(index),fill=as.factor(Class),pattern=as.factor(Class))) +
  geom_bar_pattern(stat="identity",
                   colour="black",
                   pattern="stripe",
                   pattern_fill="black",
                   pattern_density=0.2,
                   pattern_spacing=0.05,
                   pattern_angle=45,
                   pattern_size=0.2) +
  scale_y_discrete(label=temp$Metabolite) +
  xlab("Loading") +
  ylab("") +
  theme(text=element_text(size=16), legend.position="none")

# time mode
c = NPLS_milkMetab$Fac[[3]][,1] %>%
  as_tibble() %>% 
  mutate(value=-1*value, timepoints = c(30,60,90)) %>% 
  ggplot(aes(x=timepoints,y=value)) +
  geom_line() +
  geom_point() +
  xlab("Time point [day]") +
  ylab("Loading") +
  ylim(0,1) +
  theme(text=element_text(size=16))

a

b

c