Ceabigr correlation matrices attempt

Genes-SNPs-methylation correlation first attempt for CEABIGR project

================

Load packages

library(ade4)
library(tidyverse)
library(Hmisc)
library(corrplot)
library(psych)
library(cowplot)

Load data

Gene expression

genes<-read_csv("output/52.1-rnaseq-relatedness/gene-distance-matrix.csv")

genes<-as.matrix(genes)

rownames(genes)<-genes[,1]

genes <- genes[,-1]

# Remove "S" from row names and column names
new_row_names <- gsub("S", "", rownames(genes))
new_col_names <- gsub("S", "", colnames(genes))

# Set the updated row and column names
rownames(genes) <- new_row_names
colnames(genes) <- new_col_names

#convert to numeric
char_to_numeric_matrix <- function(char_matrix) {
  numeric_matrix <- as.data.frame(char_matrix)  # Convert to data frame
  numeric_matrix <- as.matrix(sapply(numeric_matrix, as.numeric))  # Convert to numeric
  rownames(numeric_matrix) <- rownames(char_matrix)  # Restore row names
  colnames(numeric_matrix) <- colnames(char_matrix)  # Restore column names
  return(numeric_matrix)
}

genes <- char_to_numeric_matrix(genes)

str(genes)
##  num [1:26, 1:26] 0 146 390 417 397 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:26] "12M" "13M" "16F" "19F" ...
##   ..$ : chr [1:26] "12M" "13M" "16F" "19F" ...
dim(genes)
## [1] 26 26
head(genes)
##          12M      13M      16F      19F      22F      23M      29F      31M
## 12M   0.0000 145.5063 390.4742 417.3537 397.2873 139.9290 295.6525 152.6554
## 13M 145.5063   0.0000 393.0197 416.8977 398.1907 147.9623 299.5315 160.6883
## 16F 390.4742 393.0197   0.0000 279.3173 250.4859 393.8593 205.4716 394.9736
## 19F 417.3537 416.8977 279.3173   0.0000 157.4585 418.9258 241.0171 420.6649
## 22F 397.2873 398.1907 250.4859 157.4585   0.0000 398.2755 209.6555 399.9205
## 23M 139.9290 147.9623 393.8593 418.9258 398.2755   0.0000 299.9497 155.2248
##          35F      36F      39F       3F      41F      44F      48M      50F
## 12M 400.9779 406.5213 419.6556 390.1470 325.1837 339.2259 136.1811 398.4761
## 13M 401.9517 408.1230 418.5719 390.5727 327.5388 340.9078 138.9828 399.2020
## 16F 247.5056 256.0873 255.1364 241.9767 228.1060 192.3941 391.0695 231.6528
## 19F 161.8560 162.5126 168.6290 166.0425 188.9144 220.8312 416.6590 170.3868
## 22F 143.3328 147.1271 158.9539 156.0276 173.9443 188.5986 396.5680 141.5667
## 23M 402.5628 408.1249 421.0592 392.0893 327.3064 339.2207 132.6171 400.5378
##          52F      53F      54F      59M      64M       6M      76F      77F
## 12M 302.3304 388.2059 327.9185 254.5311 195.7813 135.0665 370.7712 413.6977
## 13M 309.8153 389.2320 329.8300 261.9138 207.0109 138.7528 369.0968 411.6292
## 16F 250.6157 228.9607 227.4224 329.7437 343.0335 390.8418 265.2108 266.8318
## 19F 299.4613 169.0056 183.6896 340.6832 360.6826 415.2878 160.1907 154.0734
## 22F 274.7430 140.8063 160.3216 322.7154 340.0087 395.4580 155.2050 151.4431
## 23M 302.7814 390.6986 329.8332 247.6274 197.1889 136.8613 371.2288 414.8847
##           7M       9M
## 12M 137.8488 143.5715
## 13M 147.7513 149.1427
## 16F 389.2716 394.2081
## 19F 413.7472 421.0740
## 22F 393.6442 401.0017
## 23M 138.0533 140.3313

Methylation

methyl<-read.table(file="output/56-matrix-synergy/all.meth-distance.tab")

#convert to numeric

#rewrite here to fix format of column names 
methyl<-as.matrix(methyl, rownames=TRUE, colnames=TRUE)
methyl[1,1]<-""
rownames(methyl)<-methyl[,1]
colnames(methyl)<-methyl[1,]
methyl <- methyl[-1, -1]

methyl <- char_to_numeric_matrix(methyl)

str(methyl)
##  num [1:26, 1:26] 0 23304 30773 29630 30346 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:26] "12M" "13M" "16F" "19F" ...
##   ..$ : chr [1:26] "12M" "13M" "16F" "19F" ...
dim(methyl)
## [1] 26 26
head(methyl)
##          12M      13M      16F      19F      22F      23M      29F      31M
## 12M     0.00 23304.01 30773.35 29629.70 30346.25 23098.58 31184.73 23129.98
## 13M 23304.01     0.00 31014.11 29879.93 30587.51 23344.65 30795.72 23356.35
## 16F 30773.35 31014.11     0.00 21908.45 22130.65 31511.83 21954.90 31138.59
## 19F 29629.70 29879.93 21908.45     0.00 21383.26 30272.65 21751.81 30048.86
## 22F 30346.25 30587.51 22130.65 21383.26     0.00 31126.69 21748.06 30843.14
## 23M 23098.58 23344.65 31511.83 30272.65 31126.69     0.00 31986.18 23189.74
##          35F      36F      39F       3F      41F      44F      48M      50F
## 12M 29283.62 29251.41 29579.97 31919.23 32063.83 31068.04 22942.88 30802.18
## 13M 29464.21 29474.21 29236.69 31853.28 32161.13 31178.57 22983.32 30925.90
## 16F 22231.28 22292.71 22057.29 22432.76 22808.30 21399.95 30990.10 22163.84
## 19F 21656.49 21511.49 21624.73 21911.40 22031.13 21324.27 29744.86 21635.20
## 22F 21939.90 21806.09 21906.64 22252.87 22128.82 21368.22 30423.65 21877.25
## 23M 29955.70 29935.49 30199.07 32447.18 32830.28 31540.12 22483.58 31438.81
##          52F      53F      54F      59M      64M       6M      76F      77F
## 12M 32363.67 29715.85 30263.18 23373.46 23109.80 22788.21 28416.24 29508.27
## 13M 32560.67 29916.97 30218.75 22750.43 23470.15 22821.71 28133.35 29423.39
## 16F 22081.44 22096.18 21707.26 31684.28 31088.76 31009.81 22821.71 22009.22
## 19F 22468.54 21609.50 21245.89 30506.55 29805.08 29855.33 21997.49 21394.53
## 22F 22479.71 21920.39 21462.58 31287.24 30607.09 30526.70 22735.05 21836.51
## 23M 33105.26 30529.01 30907.61 21719.67 23134.88 23003.41 28182.87 29941.71
##           7M       9M
## 12M 23029.43 22958.67
## 13M 23402.97 23244.69
## 16F 31265.45 31048.02
## 19F 30162.05 29908.63
## 22F 30724.11 30620.98
## 23M 23068.02 23214.31

SNPs

snp<-read.table(file = "output/53-revisit-epi-SNPs/epiMATRIX_mbd_rab.txt")

#rewrite here to fix format of column names 
snp<-as.matrix(snp, rownames=TRUE, colnames=TRUE)
snp[1,1]<-""
rownames(snp)<-snp[,1]
colnames(snp)<-snp[1,]
snp <- snp[-1, -1]

snp <- char_to_numeric_matrix(snp)

str(snp)
##  num [1:26, 1:26] 0 0.0518 0.0651 0.0749 0.0521 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:26] "12M" "13M" "16F" "19F" ...
##   ..$ : chr [1:26] "12M" "13M" "16F" "19F" ...
dim(snp)
## [1] 26 26
head(snp)
##          12M      13M      16F      19F      22F      23M      29F      31M
## 12M 0.000000 0.051833 0.065087 0.074943 0.052083 0.056348 0.063667 0.057863
## 13M 0.051833 0.000000 0.045002 0.054090 0.036945 0.046627 0.086433 0.053412
## 16F 0.065087 0.045002 0.000000 0.069003 0.059445 0.050295 0.054638 0.055854
## 19F 0.074943 0.054090 0.069003 0.000000 0.056011 0.057057 0.052378 0.056290
## 22F 0.052083 0.036945 0.059445 0.056011 0.000000 0.046587 0.046753 0.032094
## 23M 0.056348 0.046627 0.050295 0.057057 0.046587 0.000000 0.045299 0.057947
##          35F      36F      39F       3F      41F      44F      48M      50F
## 12M 0.072606 0.067544 0.061042 0.042357 0.057715 0.067394 0.059371 0.055395
## 13M 0.056313 0.050231 0.084056 0.046292 0.049820 0.054259 0.057060 0.046225
## 16F 0.068856 0.073992 0.069586 0.044772 0.057536 0.067168 0.058939 0.054472
## 19F 0.071453 0.077480 0.066301 0.043394 0.063506 0.073212 0.062576 0.066056
## 22F 0.063189 0.056824 0.049644 0.037513 0.055742 0.052345 0.048149 0.057445
## 23M 0.051805 0.057087 0.050827 0.050026 0.044597 0.084425 0.068598 0.058827
##          52F      53F      54F      59M      64M       6M      76F      77F
## 12M 0.067694 0.060491 0.061507 0.050297 0.058001 0.059446 0.044517 0.058163
## 13M 0.050050 0.055199 0.066474 0.076069 0.051247 0.070837 0.063056 0.061562
## 16F 0.066497 0.058376 0.066752 0.043498 0.055569 0.057552 0.040993 0.060895
## 19F 0.070109 0.067405 0.069571 0.048032 0.054852 0.056932 0.041286 0.068516
## 22F 0.055745 0.052095 0.055375 0.026356 0.045929 0.049707 0.026188 0.048443
## 23M 0.056012 0.054487 0.056216 0.102967 0.059540 0.049079 0.085223 0.059623
##           7M       9M
## 12M 0.064160 0.055671
## 13M 0.052786 0.055597
## 16F 0.069088 0.062833
## 19F 0.071829 0.058750
## 22F 0.056014 0.041163
## 23M 0.055657 0.047079

Correlate Genes and Methylation

Correlate the gene expression and methylation matrix, return a matrix of correlation values.

genes.methyl.cor<-cor(genes, methyl)
genes.methyl.p<-rcorr(genes, methyl)

genes.methyl.cor<-corr.test(genes, methyl, method="spearman")

Plot this correlation.

genes.methyl.plot<-corrplot(genes.methyl.cor$r, p.mat = genes.methyl.cor$p, insig = "blank", hclust.method=c("complete"), type="full", col=COL2('PRGn', 10), order="hclust", pch.col="black", tl.col="black", title="Methylation ~ Gene Expression", mar = c(1, 1, 1, 1))

This plot shows that there is a positive relationship between distance in gene expression and distance in methylation between samples WITHIN each sex. This makes sense. For samples that are closely related in methylation, they are also closely related in gene expression (i.e., males are more like males and females are more like females). For samples that are not related in methylation, they are also not related in gene expression (i.e., males and females are super different).

Note that dots present show the correlation r value by color. Squares with a dot have p values <0.05. Blank squares have p values >0.05.

Run Mantel test

genes_dist<-as.dist(genes)
methyl_dist<-as.dist(methyl)

mantel.rtest(genes_dist, methyl_dist, nrepet = 99)
## Monte-Carlo test
## Call: mantel.rtest(m1 = genes_dist, m2 = methyl_dist, nrepet = 99)
## 
## Observation: 0.8428262 
## 
## Based on 99 replicates
## Simulated p-value: 0.01 
## Alternative hypothesis: greater 
## 
##       Std.Obs   Expectation      Variance 
##  1.292690e+01 -4.101257e-08  4.250965e-03
plot(r1 <- mantel.rtest(genes_dist,methyl_dist), main = "Mantel's test"); r1
## Monte-Carlo test
## Call: mantel.rtest(m1 = genes_dist, m2 = methyl_dist)
## 
## Observation: 0.8428262 
## 
## Based on 99 replicates
## Simulated p-value: 0.01 
## Alternative hypothesis: greater 
## 
##      Std.Obs  Expectation     Variance 
## 18.225092315 -0.004445709  0.002161256

Monte-Carlo test
Call: mantel.rtest(m1 = genes_dist, m2 = methyl_dist, nrepet = 99)

Observation: 0.8428262 

Based on 99 replicates
Simulated p-value: 0.01 
Alternative hypothesis: greater 

     Std.Obs  Expectation     Variance 
17.282653012 -0.002387819  0.002391732 

This shows that sample distance in methylation and sample distance in gene expression are significantly and highly positively correlated (r=0.843, p=0.01). 

Correlate Genes and SNP

Correlate the gene and snp matrix, return a matrix of correlation values.

genes.snp.cor<-cor(genes, snp)
genes.snp.p<-rcorr(genes, snp)
genes.snp.cor<-corr.test(genes, snp, method="spearman")

Plot this correlation.

genes.snp.plot<-corrplot(genes.snp.cor$r, p.mat = genes.snp.cor$p, insig = "blank", hclust.method=c("complete"), type="full", col=COL2('PRGn', 10), order="hclust", pch.col="black", tl.col="black", title="Gene Expression ~ Genetic Distance", mar = c(1, 1, 1, 1))

This plot shows that there is not a relationship between distance in gene expression and genetic distance in either sex. Gene expression is not related to genetic distance.

Run Mantel test

genes_dist<-as.dist(genes)
snp_dist<-as.dist(snp)

mantel.rtest(genes_dist, snp_dist, nrepet = 99)
## Monte-Carlo test
## Call: mantelnoneuclid(m1 = m1, m2 = m2, nrepet = nrepet)
## 
## Observation: -0.0530464 
## 
## Based on 99 replicates
## Simulated p-value: 0.78 
## Alternative hypothesis: greater 
## 
##       Std.Obs   Expectation      Variance 
## -0.9429996204 -0.0002315316  0.0031368180
plot(r2 <- mantel.rtest(genes_dist,snp_dist), main = "Mantel's test"); r2
## Monte-Carlo test
## Call: mantelnoneuclid(m1 = m1, m2 = m2, nrepet = nrepet)
## 
## Observation: -0.0530464 
## 
## Based on 99 replicates
## Simulated p-value: 0.86 
## Alternative hypothesis: greater 
## 
##      Std.Obs  Expectation     Variance 
## -0.943008109  0.008612015  0.004275174

Monte-Carlo test
Call: mantelnoneuclid(m1 = m1, m2 = m2, nrepet = nrepet)

Observation: -0.0530464 

Based on 99 replicates
Simulated p-value: 0.76 
Alternative hypothesis: greater 

     Std.Obs  Expectation     Variance 
-0.678945461 -0.010210686  0.003980542 

This shows that sample distance in gene expression and sample distance in snps are not related (r=-0.053, p=0.76).

Correlate SNP and Methylation

Correlate the methylation and snp matrix, return a matrix of correlation values. Generate matrix of correlation p values.

methyl.snp.cor<-cor(methyl, snp)
methyl.snp.p<-rcorr(methyl, snp)
methyl.snp.cor<-corr.test(methyl, snp, method="spearman")

Plot this correlation.

methyl.snp.plot<-corrplot(methyl.snp.cor$r, p.mat = methyl.snp.cor$p, insig = "blank", hclust.method=c("complete"), type="full", col=COL2('PRGn', 10), order="hclust", pch.col="black", tl.col="black", title="Methylation ~ Genetic Distance", mar = c(1, 1, 1, 1))

This plot shows that there is not a relationship between distance in methylation and genetic distance in either sex. Methylation is not related to genetic distance.

Run Mantel test

methyl_dist<-as.dist(methyl)
snp_dist<-as.dist(snp)

mantel.rtest(methyl_dist, snp_dist, nrepet = 99)
## Monte-Carlo test
## Call: mantelnoneuclid(m1 = m1, m2 = m2, nrepet = nrepet)
## 
## Observation: -0.1624565 
## 
## Based on 99 replicates
## Simulated p-value: 0.99 
## Alternative hypothesis: greater 
## 
##       Std.Obs   Expectation      Variance 
## -2.5400994085 -0.0004187274  0.0040694070
plot(r3 <- mantel.rtest(methyl_dist,snp_dist), main = "Mantel's test"); r3
## Monte-Carlo test
## Call: mantelnoneuclid(m1 = m1, m2 = m2, nrepet = nrepet)
## 
## Observation: -0.1624565 
## 
## Based on 99 replicates
## Simulated p-value: 0.99 
## Alternative hypothesis: greater 
## 
##      Std.Obs  Expectation     Variance 
## -2.700162408 -0.006303361  0.003344427

Monte-Carlo test
Call: mantelnoneuclid(m1 = m1, m2 = m2, nrepet = nrepet)

Observation: -0.1624565 

Based on 99 replicates
Simulated p-value: 0.99 
Alternative hypothesis: greater 

     Std.Obs  Expectation     Variance 
-2.411299545 -0.006336771  0.004191923

This shows that between sample distance in methylation and between sample distance in snps are not related (r=-0.162, p=0.99).

Written on September 29, 2023