Skip to contents

Introduction

This article presents the CP results of the AP dataset discussed in Chapter 7 of my dissertation.

Preamble

## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'CMTFtoolbox'
## The following object is masked from 'package:NPLStoolbox':
## 
##     npred
## The following objects are masked from 'package:parafac4microbiome':
## 
##     fac_to_vect, reinflateFac, reinflateTensor, vect_to_fac
testMetadata = function(model, comp, metadata){

  transformedSubjectLoadings = model$Fac[[1]][,comp]
  transformedSubjectLoadings = transformedSubjectLoadings / norm(transformedSubjectLoadings, "2")
  
  metadata = metadata$mode1 %>% left_join(ageInfo, by="SubjectID") %>% mutate(Gender=as.numeric(as.factor(Gender), PainS_NopainA=as.numeric(as.factor(PainS_NopainA))))
  result = lm(transformedSubjectLoadings ~ Gender + Age + PainS_NopainA, data=metadata)
  
  # Extract coefficients and confidence intervals
  coef_estimates <- summary(result)$coefficients
  conf_intervals <- confint(result)
  
  # Remove intercept
  coef_estimates <- coef_estimates[rownames(coef_estimates) != "(Intercept)", ]
  conf_intervals <- conf_intervals[rownames(conf_intervals) != "(Intercept)", ]
  
  # Combine into a clean data frame
  summary_table <- data.frame(
    Term     = rownames(coef_estimates),
    Estimate = coef_estimates[, "Estimate"] * 1e3,
    CI       = paste0(
                 conf_intervals[, 1], " – ",
                 conf_intervals[, 2]
               ),
    P_value  = coef_estimates[, "Pr(>|t|)"],
    P_adjust = p.adjust(coef_estimates[, "Pr(>|t|)"], "BH"),
    row.names = NULL
  )
  
  return(summary_table)
}
cytokines_meta_data = read.csv("./AP/input_deduplicated_metadata_RvdP.csv", sep=" ", header=FALSE) %>% as_tibble()
colnames(cytokines_meta_data) = c("SubjectID", "Visit", "Gender", "Age", "Pain_noPain", "case_control")
ageInfo = cytokines_meta_data %>% select(SubjectID, Age) %>% unique()
# Case only subcohort
processedCytokines_case = CMTFtoolbox::Georgiou2025$Inflammatory_mediators

mask = processedCytokines_case$mode1$case_control == "case"
processedCytokines_case$data = processedCytokines_case$data[mask,,]
processedCytokines_case$mode1 = processedCytokines_case$mode1[mask,]

# Also remove subject A11-8 due to being an outlier
processedCytokines_case$data = processedCytokines_case$data[-25,,]
processedCytokines_case$mode1 = processedCytokines_case$mode1[-25,]

processedCytokines_case$data = log(processedCytokines_case$data + 0.005)
processedCytokines_case$data = multiwayCenter(processedCytokines_case$data, 1)
processedCytokines_case$data = multiwayScale(processedCytokines_case$data, 2)
colourCols = c("", "", "")
legendTitles = c("", "", "")
xLabels = c("Subject index", "Feature index", "Time index")
legendColNums = c(2,5,0)
arrangeModes = c(FALSE, FALSE, FALSE)
continuousModes = c(FALSE,FALSE,TRUE)

Inflammatory mediators

CP was applied to capture the dominant source of variation within the longitudinally measured inflammatory mediator data block. The model selection procedure indicates that CORCONDIA scores were inconclusive and loadings were unstable using four CP components. The commented code below performs the procedure, and is not rerun here due to computational limitations.

# assessment_cytokines_case = parafac4microbiome::assessModelQuality(processedCytokines_case$data, numRepetitions=10, numCores=10)
# assessment_cytokines_case$plots$overview
# assessment_cytokines_case$metrics$CORCONDIA %>% as_tibble() %>% mutate(index=1:nrow(.)) %>% pivot_longer(-index) %>% ggplot(aes(x=as.factor(name),y=value)) + geom_boxplot() + xlab("Number of components") + ylab("CORCONDIA") + theme(text=element_text(size=16))

# stability_cytokines = parafac4microbiome::assessModelStability(processedCytokines_case, maxNumComponents=5, numFolds=10, colourCols=colourCols, legendTitles=legendTitles, xLabels = xLabels, legendColNums=legendColNums, arrangeModes=arrangeModes, numCores=parallel::detectCores())
# stability_cytokines$modelPlots[[2]]
# stability_cytokines$modelPlots[[3]]
# stability_cytokines$modelPlots[[4]]
# stability_cytokines$modelPlots[[5]]
# cp_cytokines_case = parafac4microbiome::parafac(processedCytokines_case$data, nfac=3, nstart=100)
cp_cytokines_case = readRDS("./AP/cp_cytokines_case.RDS")

cp_cytokines_case$varExp
## [1] 36.56742

Therefore, a three-component CP model was selected, explaining 36.6% of the variation. MLR analysis revealed that the first CP component captured variation associated with subject gender, while the second component was not interpretable, and the third component captured variation associated with a mixture of age and pain perception.

testMetadata(cp_cytokines_case, 1, processedCytokines_case)
##             Term   Estimate                                         CI
## 1         Gender 164.949044     0.0072276098315647 – 0.322670478266166
## 2            Age  -1.993531 -0.00749714837460696 – 0.00351008731927873
## 3 PainS_NopainAS  90.764635    -0.0677608834293364 – 0.249290152970497
##      P_value  P_adjust
## 1 0.04120853 0.1236256
## 2 0.45964329 0.4596433
## 3 0.24705950 0.3705893
testMetadata(cp_cytokines_case, 2, processedCytokines_case)
##             Term   Estimate                                        CI   P_value
## 1         Gender -10.645881    -0.188082738359188 – 0.166790975410106 0.9018900
## 2            Age  -3.055744 -0.00924732264771653 – 0.0031358344938848 0.3164014
## 3 PainS_NopainAS  31.654457    -0.146686995485999 – 0.209995909725848 0.7157353
##   P_adjust
## 1  0.90189
## 2  0.90189
## 3  0.90189
testMetadata(cp_cytokines_case, 3, processedCytokines_case)
##             Term    Estimate                                         CI
## 1         Gender   97.432563    -0.0307466851714757 – 0.225611811849372
## 2            Age   -8.817799 -0.0132905554470071 – -0.00434504207134558
## 3 PainS_NopainAS -141.890668   -0.270723390978273 – -0.0130579456514781
##        P_value    P_adjust
## 1 0.1288756411 0.128875641
## 2 0.0005117721 0.001535316
## 3 0.0324479469 0.048671920
# Check sign
topIndices = processedCytokines_case$mode2 %>% mutate(index=1:nrow(.), Comp = cp_cytokines_case$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
bottomIndices = processedCytokines_case$mode2 %>% mutate(index=1:nrow(.), Comp = cp_cytokines_case$Fac[[2]][,1]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()

Xhat = parafac4microbiome::reinflateTensor(cp_cytokines_case$Fac[[1]][,1], cp_cytokines_case$Fac[[2]][,1], cp_cytokines_case$Fac[[3]][,1])

print("Positive loadings:")
## [1] "Positive loadings:"
print(cor(Xhat[,topIndices[1],2], as.numeric(as.factor(processedCytokines_case$mode1$Gender)), use="pairwise.complete.obs")) # flip
## [1] -0.4193432
print("Negative loadings:")
## [1] "Negative loadings:"
print(cor(Xhat[,bottomIndices[5],2], as.numeric(as.factor(processedCytokines_case$mode1$Gender)), use="pairwise.complete.obs"))
## [1] 0.4193432
# Plot
a = processedCytokines_case$mode1 %>%
  mutate(Comp1 = cp_cytokines_case$Fac[[1]][,1]) %>%
  ggplot(aes(x=as.factor(Gender),y=Comp1)) +
  geom_boxplot() +
  geom_jitter(height=0, width=0.05) +
  stat_compare_means(comparisons=list(c("F", "M")),label="p.signif") +
  xlab("") +
  ylab("Loading") +
  scale_x_discrete(labels=c("Female", "Male")) +
  theme(text=element_text(size=16))

temp = processedCytokines_case$mode2 %>%
  as_tibble() %>%
  mutate(Component_1 = -1*cp_cytokines_case$Fac[[2]][,1]) %>%
  arrange(Component_1) %>%
  mutate(index=1:nrow(.))

b=temp %>%
  ggplot(aes(x=as.factor(index),y=Component_1)) +
  geom_bar(stat="identity",col="black") +
  scale_x_discrete(label=temp$name) +
  xlab("") +
  ylab("Loading") +
  scale_x_discrete(labels=c("IL-10", "OPG", "VEGF", "CRP", expression(IFN-gamma), "IL-6", expression(TNF-alpha), "GM-CSF", "IL-17A", "IL-8", "IL-12p70", expression(IL-1*alpha), expression(IL-1*beta), expression(MIP-1*alpha), "IL-4", "RANKL")) +
  theme(text=element_text(size=16),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
c = cp_cytokines_case$Fac[[3]][,1] %>% as_tibble() %>% mutate(value=-1*value) %>% mutate(timepoint=c(-6,-3,0,1,6,13)) %>% ggplot(aes(x=timepoint,y=value)) + geom_line() + geom_point() + xlab("Time point [weeks]") + ylab("Loading") + ylim(0,1) + theme(text=element_text(size=16))

a

b

c

# Check sign
topIndices = processedCytokines_case$mode2 %>% mutate(index=1:nrow(.), Comp = cp_cytokines_case$Fac[[2]][,3]) %>% arrange(desc(Comp)) %>% head() %>% select(index) %>% pull()
bottomIndices = processedCytokines_case$mode2 %>% mutate(index=1:nrow(.), Comp = cp_cytokines_case$Fac[[2]][,3]) %>% arrange(desc(Comp)) %>% tail() %>% select(index) %>% pull()

Xhat = parafac4microbiome::reinflateTensor(cp_cytokines_case$Fac[[1]][,3], cp_cytokines_case$Fac[[2]][,3], cp_cytokines_case$Fac[[3]][,3])

print("Positive loadings:")
## [1] "Positive loadings:"
print(cor(Xhat[,topIndices[1],2], processedCytokines_case$mode1 %>% left_join(ageInfo) %>% select(Age), use="pairwise.complete.obs")) # no flip
## Joining with `by = join_by(SubjectID)`
##            Age
## [1,] 0.5797891
print("Negative loadings:")
## [1] "Negative loadings:"
print(cor(Xhat[,bottomIndices[5],2], processedCytokines_case$mode1 %>% left_join(ageInfo) %>% select(Age), use="pairwise.complete.obs"))
## Joining with `by = join_by(SubjectID)`
##             Age
## [1,] -0.5797891
# Plot
a = processedCytokines_case$mode1 %>%
  left_join(ageInfo) %>%
  mutate(Comp1 = cp_cytokines_case$Fac[[1]][,3]) %>%
  ggplot(aes(x=Age,y=Comp1)) +
  geom_point() +
  stat_cor() +
  xlab("Age") +
  ylab("Loading") +
  theme(text=element_text(size=16))
## Joining with `by = join_by(SubjectID)`
temp = processedCytokines_case$mode2 %>%
  as_tibble() %>%
  mutate(Component_1 = cp_cytokines_case$Fac[[2]][,3]) %>%
  arrange(Component_1) %>%
  mutate(index=1:nrow(.))

b=temp %>%
  ggplot(aes(x=as.factor(index),y=Component_1)) +
  geom_bar(stat="identity",col="black") +
  scale_x_discrete(label=temp$name) +
  xlab("") +
  ylab("Loading") +
  scale_x_discrete(labels=c("IL-10", "IL-17A", "GM-CSF", "IL-12p70", expression(IFN-gamma), expression(IL-1*alpha), "RANKL", expression(IL-1*beta), "CRP", "OPG", expression(MIP-1*alpha), "IL-6", expression(TNF-alpha), "VEGF", "IL-8")) +
  theme(text=element_text(size=16),axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
c = cp_cytokines_case$Fac[[3]][,3] %>% as_tibble() %>% mutate(value=-1*value) %>% mutate(timepoint=c(-6,-3,0,1,6,13)) %>% ggplot(aes(x=timepoint,y=value)) + geom_line() + geom_point() + xlab("Time point [weeks]") + ylab("Loading") + ylim(0,1) + theme(text=element_text(size=16))

a

b

c