Model selection
# Please see the separate R scripts at https://doi.org/10.5281/zenodo.16993009 for the model selection.
model_ACMTF = readRDS("./MAINHEALTH/ACMTF_model_bmi.RDS")
testMetadata(model_ACMTF, 1, processedFaeces)
#> Term Estimate CI
#> 1 BMI -2.276732 -0.00450781299767528 – -4.56503944847972e-05
#> 2 C.section 4.715650 -0.0361024191316937 – 0.045533718733486
#> 3 Secretor -84.617925 -0.112828298007938 – -0.0564075514364901
#> 4 Lewis 55.837894 -0.024014950520992 – 0.13569073945265
#> 5 whz.6m 3.157397 -0.0075259356824812 – 0.0138407294998676
#> P_value P_adjust
#> 1 4.556168e-02 1.139042e-01
#> 2 8.195074e-01 8.195074e-01
#> 3 2.717526e-08 1.358763e-07
#> 4 1.688349e-01 2.813915e-01
#> 5 5.596344e-01 6.995429e-01
testMetadata(model_ACMTF, 2, processedFaeces)
#> Term Estimate CI P_value
#> 1 BMI -4.4150025 -0.00694585174854792 – -0.0018841532867865 0.0007595475
#> 2 C.section -40.0909098 -0.0863932916037776 – 0.00621147209552562 0.0890716786
#> 3 Secretor 41.3042948 0.0093035776930651 – 0.0733050118693837 0.0118369347
#> 4 Lewis 72.2848185 -0.0182970492398522 – 0.162866686186581 0.1167744722
#> 5 whz.6m 0.9816376 -0.0111371068252218 – 0.0131003820123962 0.8728860770
#> P_adjust
#> 1 0.003797738
#> 2 0.145968090
#> 3 0.029592337
#> 4 0.145968090
#> 5 0.872886077
lambda = abs(model_ACMTF$Fac[[8]])
colnames(lambda) = paste0("C", 1:2)
lambda %>%
as_tibble() %>%
mutate(block=c("Infant faecal microbiome", "HM microbiome", "HM metabolome")) %>%
# mutate(block=factor(block, levels=c("tongue", "lowling", "lowinter", "upling", "upinter", "saliva", "metab"))) %>%
pivot_longer(-block) %>%
ggplot(aes(x=as.factor(name),y=value,fill=as.factor(block))) +
geom_bar(stat="identity",position=position_dodge(),col="black") +
xlab("ACMTF component number") +
ylab(expression(lambda)) +
scale_x_discrete(labels=1:3) +
scale_fill_manual(name="Dataset",values = hue_pal()(3),labels=c("HM metabolome", "HM microbiome", "Infant faecal microbiome")) +
theme(legend.position="top", text=element_text(size=16))

# Faeces
topIndices = processedFaeces$mode2 %>% mutate(index=1:nrow(.), Comp = model_ACMTF$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
bottomIndices = processedFaeces$mode2 %>% mutate(index=1:nrow(.), Comp = model_ACMTF$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()
Xhat = parafac4microbiome::reinflateTensor(model_ACMTF$Fac[[1]][,1], model_ACMTF$Fac[[2]][,1], model_ACMTF$Fac[[3]][,1])
print("Positive loadings:")
#> [1] "Positive loadings:"
print(cor(Xhat[,topIndices[1],1], processedMilkMetab$mode1$Secretor)) # no flip
#> [1] 0.4992547
print("Negative loadings:")
#> [1] "Negative loadings:"
print(cor(Xhat[,bottomIndices[1],1], processedMilkMetab$mode1$Secretor))
#> [1] -0.4992547
print("Positive loadings:")
#> [1] "Positive loadings:"
print(cor(Xhat[,topIndices[1],1], processedMilkMetab$mode1$BMI)) # no flip
#> [1] 0.07586748
print("Negative loadings:")
#> [1] "Negative loadings:"
print(cor(Xhat[,bottomIndices[1],1], processedMilkMetab$mode1$BMI))
#> [1] -0.07586748
# Milk
topIndices = processedMilk$mode2 %>% mutate(index=1:nrow(.), Comp = model_ACMTF$Fac[[4]][,1]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
bottomIndices = processedMilk$mode2 %>% mutate(index=1:nrow(.), Comp = model_ACMTF$Fac[[4]][,1]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()
Xhat = 1 * parafac4microbiome::reinflateTensor(model_ACMTF$Fac[[1]][,1], model_ACMTF$Fac[[4]][,1], model_ACMTF$Fac[[5]][,1])
print("Positive loadings:")
#> [1] "Positive loadings:"
print(cor(Xhat[,topIndices[1],1], processedMilk$mode1$Secretor)) # no flip
#> [1] 0.5156076
print("Negative loadings:")
#> [1] "Negative loadings:"
print(cor(Xhat[,bottomIndices[1],1], processedMilk$mode1$Secretor))
#> [1] -0.5156076
print("Positive loadings:")
#> [1] "Positive loadings:"
print(cor(Xhat[,topIndices[1],1], processedMilk$mode1$BMI)) # no flip
#> [1] 0.07586744
print("Negative loadings:")
#> [1] "Negative loadings:"
print(cor(Xhat[,bottomIndices[1],1], processedMilk$mode1$BMI))
#> [1] -0.07586744
# Milk metab
topIndices = processedMilkMetab$mode2 %>% mutate(index=1:nrow(.), Comp = model_ACMTF$Fac[[6]][,1]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
bottomIndices = processedMilkMetab$mode2 %>% mutate(index=1:nrow(.), Comp = model_ACMTF$Fac[[6]][,1]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()
Xhat = parafac4microbiome::reinflateTensor(model_ACMTF$Fac[[1]][,1], model_ACMTF$Fac[[6]][,1], model_ACMTF$Fac[[7]][,1])
print("Positive loadings:")
#> [1] "Positive loadings:"
print(cor(Xhat[,topIndices[1],1], processedMilkMetab$mode1$Secretor)) # no flip
#> [1] 0.4992547
print("Negative loadings:")
#> [1] "Negative loadings:"
print(cor(Xhat[,bottomIndices[1],1], processedMilkMetab$mode1$Secretor))
#> [1] -0.4992547
print("Positive loadings:")
#> [1] "Positive loadings:"
print(cor(Xhat[,topIndices[1],1], processedMilkMetab$mode1$BMI)) # no flip
#> [1] 0.07586748
print("Negative loadings:")
#> [1] "Negative loadings:"
print(cor(Xhat[,bottomIndices[1],1], processedMilkMetab$mode1$BMI))
#> [1] -0.07586748
a = processedMilk$mode1 %>%
mutate(Component_1 = model_ACMTF$Fac[[1]][,1]) %>%
ggplot(aes(x=as.factor(Secretor),y=Component_1)) +
geom_boxplot() +
stat_compare_means(comparisons=list(c("0","1")), label="p.signif") +
xlab("Secretor status") +
ylab("Loading") +
scale_x_discrete(labels=c("Se-", "Se+")) +
theme(text=element_text(size=16))
df = processedFaeces$mode2 %>%
mutate(Component_1 = model_ACMTF$Fac[[2]][,1]) %>%
arrange(Component_1) %>%
mutate(index=1:nrow(.)) %>%
mutate(Genus = gsub("g__", "", V7), Species = gsub("s__", "", V8)) %>%
mutate(Species = str_split_fixed(Species, "_", 2)[,2]) %>%
mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "sp."), "", .x))) %>%
mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "organism"), "", .x))) %>%
mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "bacterium"), "", .x))) %>%
filter(index %in% c(1:10, 84:93))
df[df$Genus == "" & df$Species == "", "Species"] = "Unknown"
df[df$Species == "", "Species"] = "sp."
df$name = paste0(df$Genus, " ", df$Species)
b = df %>%
ggplot(aes(x=Component_1,y=as.factor(index),fill=as.factor(V3))) +
geom_bar(stat="identity", col="black") +
xlab("Loading") +
ylab("") +
scale_y_discrete(labels=df$name) +
scale_fill_manual(name="Phylum", values=hue_pal()(5)[-c(4,5)], labels=c("Actinobacteriota", "Bacteroidota", "Firmicutes")) +
theme(text=element_text(size=16))
c = model_ACMTF$Fac[[3]][,1] %>%
as_tibble() %>%
mutate(value=-1*value) %>%
ggplot(aes(x=c(30,60,90),y=value)) +
geom_line() +
geom_point() +
xlab("Time point [day]") +
ylab("Loading") +
ylim(0,1) +
theme(text=element_text(size=16))
df = processedMilk$mode2 %>%
mutate(Component_1 = model_ACMTF$Fac[[4]][,1]) %>%
arrange(Component_1) %>%
mutate(index=1:nrow(.)) %>%
mutate(Genus = gsub("g__", "", V7), Species = gsub("s__", "", V8)) %>%
mutate(Species = str_split_fixed(Species, "_", 2)[,2]) %>%
mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "sp."), "", .x))) %>%
mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "organism"), "", .x))) %>%
mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "bacterium"), "", .x))) %>%
filter(index %in% c(1:10, 106:115))
df[df$Genus == "" & df$Species == "", "Species"] = "Unknown"
df[df$Species == "", "Species"] = "sp."
df$name = paste0(df$Genus, " ", df$Species)
d = df %>%
ggplot(aes(x=Component_1,y=as.factor(index),fill=as.factor(V3))) +
geom_bar(stat="identity", col="black") +
xlab("Loading") +
ylab("") +
scale_y_discrete(labels=df$name) +
scale_fill_manual(name="Phylum", values=hue_pal()(5)[-5], labels=c("Actinobacteriota", "Bacteroidota", "Firmicutes", "Proteobacteria")) +
theme(text=element_text(size=16))
e = model_ACMTF$Fac[[5]][,1] %>%
as_tibble() %>%
mutate(value=value*-1) %>%
ggplot(aes(x=c(0,30,60,90),y=value)) +
geom_line() +
geom_point() +
xlab("Time point [day]") +
ylab("Loading") +
ylim(0,1) +
theme(text=element_text(size=16))
temp = processedMilkMetab$mode2 %>%
mutate(Component_1 = model_ACMTF$Fac[[6]][,1]) %>%
arrange(Component_1) %>%
mutate(index=1:nrow(.)) %>%
filter(index %in% c(1:10, 61:70))
f=temp %>%
ggplot(aes(x=Component_1,y=as.factor(index),fill=as.factor(Class),pattern=as.factor(Class))) +
geom_bar_pattern(stat="identity",
colour="black",
pattern="stripe",
pattern_fill="black",
pattern_density=0.2,
pattern_spacing=0.05,
pattern_angle=45,
pattern_size=0.2) +
scale_y_discrete(label=temp$Metabolite) +
xlab("Loading") +
ylab("") +
scale_fill_manual(name="Class", values=hue_pal()(7)[-c(3,4,5)], labels=c("Amino acids and derivatives", "Energy related", "Oligosaccharides", "Sugars")) +
theme(text=element_text(size=16))
g = model_ACMTF$Fac[[7]][,1] %>%
as_tibble() %>%
mutate(value=value*-1) %>%
ggplot(aes(x=c(0,30,60,90),y=value)) +
geom_line() +
geom_point() +
xlab("Time point [day]") +
ylab("Loading") +
ylim(0,1) +
theme(text=element_text(size=16))
a







# Faeces
topIndices = processedFaeces$mode2 %>% mutate(index=1:nrow(.), Comp = model_ACMTF$Fac[[2]][,2]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
bottomIndices = processedFaeces$mode2 %>% mutate(index=1:nrow(.), Comp = model_ACMTF$Fac[[2]][,2]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()
Xhat = parafac4microbiome::reinflateTensor(model_ACMTF$Fac[[1]][,2], model_ACMTF$Fac[[2]][,2], model_ACMTF$Fac[[3]][,2])
print("Positive loadings:")
print(cor(Xhat[,topIndices[1],1], processedMilkMetab$mode1$Secretor))
print("Negative loadings:")
print(cor(Xhat[,bottomIndices[1],1], processedMilkMetab$mode1$Secretor))
print("Positive loadings:")
print(cor(Xhat[,topIndices[1],1], processedMilkMetab$mode1$BMI)) # flip
print("Negative loadings:")
print(cor(Xhat[,bottomIndices[1],1], processedMilkMetab$mode1$BMI))
# Milk
topIndices = processedMilk$mode2 %>% mutate(index=1:nrow(.), Comp = model_ACMTF$Fac[[4]][,2]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
bottomIndices = processedMilk$mode2 %>% mutate(index=1:nrow(.), Comp = model_ACMTF$Fac[[4]][,2]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()
Xhat = parafac4microbiome::reinflateTensor(model_ACMTF$Fac[[1]][,2], model_ACMTF$Fac[[4]][,2], model_ACMTF$Fac[[5]][,2])
print("Positive loadings:")
print(cor(Xhat[,topIndices[1],1], processedMilkMetab$mode1$Secretor))
print("Negative loadings:")
print(cor(Xhat[,bottomIndices[1],1], processedMilkMetab$mode1$Secretor))
print("Positive loadings:")
print(cor(Xhat[,topIndices[1],1], processedMilkMetab$mode1$BMI)) # flip
print("Negative loadings:")
print(cor(Xhat[,bottomIndices[1],1], processedMilkMetab$mode1$BMI))
# MilkMetab
topIndices = processedMilkMetab$mode2 %>% mutate(index=1:nrow(.), Comp = model_ACMTF$Fac[[6]][,2]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
bottomIndices = processedMilkMetab$mode2 %>% mutate(index=1:nrow(.), Comp = model_ACMTF$Fac[[6]][,2]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()
Xhat = parafac4microbiome::reinflateTensor(model_ACMTF$Fac[[1]][,2], model_ACMTF$Fac[[6]][,2], model_ACMTF$Fac[[7]][,2])
print("Positive loadings:")
print(cor(Xhat[,topIndices[1],1], processedMilkMetab$mode1$Secretor)) # NO FLIP BE CAREFUL
print("Negative loadings:")
print(cor(Xhat[,bottomIndices[1],1], processedMilkMetab$mode1$Secretor))
print("Positive loadings:")
print(cor(Xhat[,topIndices[1],1], processedMilkMetab$mode1$BMI)) # flip
print("Negative loadings:")
print(cor(Xhat[,bottomIndices[1],1], processedMilkMetab$mode1$BMI))
a = processedMilk$mode1 %>%
mutate(Component_1 = model_ACMTF$Fac[[1]][,2]) %>%
ggplot(aes(x=BMI,y=Component_1)) +
geom_point() +
stat_cor() +
xlab("ppBMI") +
ylab("Loading") +
theme(text=element_text(size=16))
df = processedFaeces$mode2 %>%
mutate(Component_1 = -1*model_ACMTF$Fac[[2]][,2]) %>%
arrange(Component_1) %>%
mutate(index=1:nrow(.)) %>%
mutate(Genus = gsub("g__", "", V7), Species = gsub("s__", "", V8)) %>%
mutate(Species = str_split_fixed(Species, "_", 2)[,2]) %>%
mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "sp."), "", .x))) %>%
mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "organism"), "", .x))) %>%
mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "bacterium"), "", .x))) %>%
filter(index %in% c(1:10, 84:93))
df[df$Genus == "" & df$Species == "", "Species"] = "Unknown"
df[df$Species == "", "Species"] = "sp."
df$name = paste0(df$Genus, " ", df$Species)
b = df %>%
ggplot(aes(x=Component_1,y=as.factor(index),fill=as.factor(V3))) +
geom_bar(stat="identity", col="black") +
xlab("Loading") +
ylab("") +
scale_y_discrete(labels=df$name) +
scale_fill_manual(name="Phylum", values=hue_pal()(5)[-5], labels=c("Actinobacteriota", "Bacteroidota", "Firmicutes", "Proteobacteria")) +
theme(text=element_text(size=16))
c = model_ACMTF$Fac[[3]][,2] %>%
as_tibble() %>%
mutate(value=value) %>%
ggplot(aes(x=c(30,60,90),y=value)) +
geom_line() +
geom_point() +
xlab("Time point [day]") +
ylab("Loading") +
ylim(0,1) +
theme(text=element_text(size=16))
df = processedMilk$mode2 %>%
mutate(Component_1 = -1*model_ACMTF$Fac[[4]][,2]) %>%
arrange(Component_1) %>%
mutate(index=1:nrow(.)) %>%
mutate(Genus = gsub("g__", "", V7), Species = gsub("s__", "", V8)) %>%
mutate(Species = str_split_fixed(Species, "_", 2)[,2]) %>%
mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "sp."), "", .x))) %>%
mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "organism"), "", .x))) %>%
mutate(dplyr::across(.cols = Species, .fns = ~ dplyr::if_else(stringr::str_detect(.x, "bacterium"), "", .x))) %>%
filter(index %in% c(1:10, 106:115))
df[df$Genus == "" & df$Species == "", "Species"] = "Unknown"
df[df$Species == "", "Species"] = "sp."
df$name = paste0(df$Genus, " ", df$Species)
d = df %>%
ggplot(aes(x=Component_1,y=as.factor(index),fill=as.factor(V3))) +
geom_bar(stat="identity", col="black") +
xlab("Loading") +
ylab("") +
scale_y_discrete(labels=df$name) +
scale_fill_manual(name="Phylum", values=hue_pal()(5)[-c(2,5)], labels=c("Actinobacteriota", "Firmicutes", "Proteobacteria")) +
theme(text=element_text(size=16))
e = model_ACMTF$Fac[[5]][,2] %>%
as_tibble() %>%
mutate(value=value) %>%
ggplot(aes(x=c(0,30,60,90),y=value)) +
geom_line() +
geom_point() +
xlab("Time point [day]") +
ylab("Loading") +
ylim(0,1) +
theme(text=element_text(size=16))
temp = processedMilkMetab$mode2 %>%
mutate(Component_1 = -1*model_ACMTF$Fac[[6]][,2]) %>%
arrange(Component_1) %>%
mutate(index=1:nrow(.)) %>%
filter(index %in% c(1:10, 61:70))
f=temp %>%
ggplot(aes(x=Component_1,y=as.factor(index),fill=as.factor(Class),pattern=as.factor(Class))) +
geom_bar_pattern(stat="identity",
colour="black",
pattern="stripe",
pattern_fill="black",
pattern_density=0.2,
pattern_spacing=0.05,
pattern_angle=45,
pattern_size=0.2) +
scale_y_discrete(label=temp$Metabolite) +
xlab("Loading") +
ylab("") +
scale_fill_manual(name="Class", values=hue_pal()(7)[-c(3,5)], labels=c("Amino acids and derivatives", "Energy related", "Food derived", "Oligosaccharides", "Sugars")) +
theme(text=element_text(size=16))
g = model_ACMTF$Fac[[7]][,2] %>%
as_tibble() %>%
mutate(value=value) %>%
ggplot(aes(x=c(0,30,60,90),y=value)) +
geom_line() +
geom_point() +
xlab("Time point [day]") +
ylab("Loading") +
ylim(0,1) +
theme(text=element_text(size=16))
a






