मुझे इन भूखंडों के समान, संबंधित फेनोटाइप के खिलाफ महत्वपूर्ण एसएनपी कोड (श्रेणीबद्ध) के लिए एक बार प्लॉट तैयार करने की आवश्यकता है:

my favorite Barplot

मैंने आर में कई तरह की कोशिश की और कुछ परिणाम प्राप्त किए लेकिन मैं अपना पसंदीदा परिणाम प्राप्त करने के लिए मैदान में हूं। यहाँ कोड और परिणाम हैं:

### डेटा

SNP_code <- as.factor(c("GG","GA","AA","GA","GA","GG","GG","GG","GG","GA","GA","AA","GA","GA","GA","GG","GG","GG","GG","AA","GG","GG","GG","GG","AA","GG","GG","GA","GG","AA","GA","GG","GG","GG","GG","GG","GG","AA","GG","GA","GG","GG","GA","GG","GG","GA","GG","GG","GA","GA","GG","GA","GG","GA","GA","GA","GA","GA","GA","GG","GG","GG","AA","GA","GA","GA","GA","GG","GA","GG","GG","GG","GA","GA","GA","GG","GG","GA","GG","AA","GG","GG","GG","AA"))

EBV <- c(0.06663,-0.03031,-0.122,-0.02021,-0.1157,-0.08131,-0.02034,-0.06324,0.06699,-0.062,0.02736,-0.1201,-0.04846,-0.06934,-0.06023,-0.009244,-0.05648,-0.01908,0.06728,-0.06517,0.08534,0.07618,-0.0814,0.06113,-0.0795,0.1055,0.08305,0.1209,-0.05314,-0.09431,0.05185,0.1347,0.1591,0.08777,0.08326,0.1612,0.09528,-0.1002,0.1561,-0.09327,0.09474,0.1356,0.06384,0.1585,0.03235,0.1081,0.1462,-0.04082,-0.05042,0.01793,-0.1157,-0.1165,-0.009399,-0.02311,-0.108,-0.1143,0.07219,0.01376,-0.05059,-0.052,0.08494,-0.0388,-0.06346,0.07789,0.02961,-0.1126,0.1102,0.133,-0.09317,-0.1181,0.1584,0.122,0.1019,-0.04074,-0.01178,0.09523,-0.03266,-0.01258,-0.0231,-0.08259,0.05823,-0.02894,-0.008242,0.07981)

LS <- c(2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,1,2,1,1,2,1,2,2,2,1,1,1,2,2,2,2,2,1,1,2,1,2,2,2,1,2,2,2,1,1,2,1,1,1,1,1,1,1,1,1,1,2,1,1,2,1,1,2,2,1,1,2,1,2,1,1,2,1,1,1,1,1,1,1,2)

IDs <- c(1033,1081,1106,1107,1120,1194,1199,1326,1334,1340,1345,1358,1398,1404,1405,1421,1457,1459,1464,1509,1529,1542,1550,2025,2030,2095,2099,2128,2141,2153,2167,2224,2232,2238,2244,2266,2271,2280,2283,2323,2326,2337,2369,2390,2391,2396,851012,851016,851021,851055,851063,851084,851105,851109,851146,851169,851176,851198,851205,851246,851266,851292,851332,851345,851488,851489,851509,851528,851531,851547,851562,851573,851574,851578,851584,851588,851592,851622,851651,851670,851672,851684,851690,861086)

sig_snp <- data.frame(IDs, SNP_code, EBV, LS)

### प्रसरण विश्लेषण और माध्य तुलना

library(dplyr)
### for LS
group_by(sig_snp, SNP_code) %>%
  summarise(
    count = n(),
    mean = mean(LS, na.rm = TRUE),
    sd = sd(LS, na.rm = TRUE))

### for EBV
group_by(sig_snp, SNP_code) %>%
  summarise(
    count = n(),
    mean = mean(EBV, na.rm = TRUE),
    sd = sd(EBV, na.rm = TRUE))

# Compute the analysis of variance
Anova.fit <- aov(EBV ~ SNP_code, data = sig_snp)
summary(Anova.fit)
# Tukey multiple pairwise-comparisons
TukeyHSD(Anova.fit) 
# or
library(multcomp)
summary(glht(Anova.fit, linfct = mcp(SNP_code = "Tukey")))

### EBV के लिए बॉक्स प्लॉट (वास्तव में मुझे LS और EBV के लिए बारप्लॉट चाहिए)

library(ggplot2)
library(ggpval)

plot <- ggplot(sig_snp, aes(SNP_code, EBV)) +
geom_boxplot(fill=c("red","blue", "green"), color="black", width=.7); plot
add_pval(plot, pairs = list(c(1, 3)), test='wilcox.test')
add_pval(plot, pairs = list(c(2, 3)), test='wilcox.test')
add_pval(plot, pairs = list(c(1, 2)), test='wilcox.test')

Boxplot

"add_pval" केवल "wilcox.test" और "t.test" का उपयोग करता है, लेकिन मैं तुकी को पसंद करता हूं। किसी भी मदद की सराहना की जाती है।

1
Mehdi Esmaeilifard 11 जिंदा 2020, 14:31

1 उत्तर

सबसे बढ़िया उत्तर

नीचे दिए गए कोड में सुधार के लिए निश्चित रूप से जगह है, लेकिन कम से कम यह आपको वर्कफ़्लो का एक उदाहरण देता है जिसका उपयोग आप अपना "पसंदीदा" बारप्लॉट प्राप्त करने के लिए कर सकते हैं:

भाग ए: बरचार्ट

1) हम ईबीवी या एलएस के कार्य में प्रत्येक एसएनपी के माध्य के साथ डेटाफ्रेम प्राप्त करने के लिए sig_snp को फिर से व्यवस्थित करते हैं।

library(tidyverse)
DF1 <- sig_snp %>% 
  pivot_longer(., cols = c(EBV,LS), names_to = "Variable", values_to = "Values") %>%
  group_by(SNP_code, Variable) %>%
  summarise(Mean = mean(Values),
            SEM = sd(Values) / sqrt(n()),
            Nb = n()) %>%
  rowwise() %>%
  mutate(Labels = as.character(SNP_code)) %>%
  mutate(Labels = paste(unlist(strsplit(Labels,"")),collapse = "/")) %>%
  mutate(Labels = paste0(Labels,"\nn = ",Nb))

# A tibble: 6 x 6
  SNP_code Variable    Mean    SEM    Nb Labels       
  <fct>    <chr>      <dbl>  <dbl> <int> <chr>        
1 AA       EBV      -0.0719 0.0202     9 "A/A\nn = 9" 
2 AA       LS        1.11   0.111      9 "A/A\nn = 9" 
3 GA       EBV      -0.0141 0.0134    31 "G/A\nn = 31"
4 GA       LS        1.23   0.0763    31 "G/A\nn = 31"
5 GG       EBV       0.0422 0.0126    44 "G/G\nn = 44"
6 GG       LS        1.48   0.0762    44 "G/G\nn = 44"

labels कॉलम को बाद में x-अक्ष की लेबलिंग के लिए फिर से उपयोग किया जाएगा।

2) फिर, हम कुल माध्य की गणना करने जा रहे हैं (जो "मीन" बार खींचने में मदद करेगा) :

library(tidyverse)
DF2 <- sig_snp  %>% 
  pivot_longer(., cols = c(EBV,LS), names_to = "Variable", values_to = "Values") %>%
  group_by(Variable) %>%
  summarise(Mean = mean(Values),
            SEM = sd(Values) / sqrt(n()),
            Nb = n()) %>%
  mutate(SNP_code = "All") %>%
  select(SNP_code, Variable, Mean, SEM, Nb) %>%
  rowwise() %>%
  mutate(Labels = paste0("Mean\nn = ",Nb))

# A tibble: 2 x 6
  SNP_code Variable    Mean     SEM    Nb Labels        
  <chr>    <chr>      <dbl>   <dbl> <int> <chr>         
1 All      EBV      0.00918 0.00944    84 "Mean\nn = 84"
2 All      LS       1.35    0.0522     84 "Mean\nn = 84"

3) हम DF1 और DF2 दोनों को बाध्य कर रहे हैं और हम सही प्लॉटिंग ऑर्डर प्राप्त करने के लिए SNP_code के स्तरों को फिर से व्यवस्थित करते हैं:

library(tidyverse)
DF <- bind_rows(DF1, DF2)
DF$Labels = factor(DF$Labels,levels= c("Mean\nn = 84",
                                       "A/A\nn = 9",
                                       "G/A\nn = 31",
                                       "G/G\nn = 44" ))

4) अब, हम इसे प्लॉट कर सकते हैं:

library(ggplot2)
ggplot(DF, aes(x = SNP_code, y = Mean, fill = SNP_code))+
  geom_bar(stat = "identity", show.legend = FALSE)+
  geom_errorbar(aes(ymin = Mean-SEM, ymax = Mean+SEM), width = 0.2)+
  facet_wrap(.~Variable, scales = "free")+
  scale_x_discrete(name = "",labels = levels(DF$Labels))

enter image description here

भाग B: बारचार्ट पर आंकड़े जोड़ना

आंकड़े जोड़ने के लिए, आप ggsignif पैकेज से geom_signif फ़ंक्शन का उपयोग कर सकते हैं जो बाहरी आउटपुट से आंकड़े जोड़ने की अनुमति देता है।

1) सबसे पहले EBV पर Tukey टेस्ट के आउटपुट के लिए डेटाफ्रेम बनाएं:

Anova.fit <- aov(EBV ~ SNP_code, data = sig_snp)
t <- TukeyHSD(Anova.fit)
stat <- t$SNP_code
Stat_EBV <- stat %>% as.data.frame() %>% 
  mutate(Variable = "EBV") %>% 
  mutate(Group = rownames(stat)) %>%
  rowwise() %>%
  mutate(Group1 = unlist(strsplit(Group,"-"))[1]) %>%
  mutate(Group2 = unlist(strsplit(Group,"-"))[2]) %>%
  mutate(labels = round(`p adj`,4)) 

Stat_EBV$y_pos <- c(0.06,0.08,0.1)

२) LS के तुकी परीक्षण के लिए एक ही बात:

Anova.fit <- aov(LS ~ SNP_code, data = sig_snp)
t <- TukeyHSD(Anova.fit)
stat <- t$SNP_code
Stat_LS <- stat %>% as.data.frame() %>% 
  mutate(Variable = "LS") %>% 
  mutate(Group = rownames(stat)) %>%
  rowwise() %>%
  mutate(Group1 = unlist(strsplit(Group,"-"))[1]) %>%
  mutate(Group2 = unlist(strsplit(Group,"-"))[2]) %>%
  mutate(labels = round(`p adj`,4)) 
Stat_LS$y_pos = c(1.7,1.9,2.1)

3) दोनों आँकड़े डेटाफ़्रेम की बाइंडिंग:

library(tidyverse)
STAT <- bind_rows(Stat_EBV,Stat_LS)

# A tibble: 6 x 10
    diff      lwr   upr  `p adj` Variable Group Group1 Group2 labels y_pos
   <dbl>    <dbl> <dbl>    <dbl> <chr>    <chr> <chr>  <chr>   <dbl> <dbl>
1 0.0578 -0.0130  0.129 0.132    EBV      GA-AA GA     AA     0.132   0.06
2 0.114   0.0457  0.183 0.000431 EBV      GG-AA GG     AA     0.0004  0.08
3 0.0563  0.0125  0.100 0.00821  EBV      GG-GA GG     GA     0.0082  0.1 
4 0.115  -0.303   0.532 0.790    LS       GA-AA GA     AA     0.790   1.7 
5 0.366  -0.0373  0.770 0.0832   LS       GG-AA GG     AA     0.0832  1.9 
6 0.251  -0.00716 0.510 0.0585   LS       GG-GA GG     GA     0.0585  2.1

४) बारचार्ट प्राप्त करें और आँकड़ों के परिणाम जोड़ें:

library(ggplot2)
library(ggsignif)
ggplot(DF, aes(x = SNP_code, y = Mean, fill = SNP_code))+
  geom_bar(stat = "identity", show.legend = FALSE)+
  geom_errorbar(aes(ymin = Mean-SEM, ymax = Mean+SEM), width = 0.2)+
  geom_signif(inherit.aes = FALSE, data = STAT, 
              aes(xmin=Group1, xmax=Group2, annotations=labels, y_position=y_pos),
              manual = TRUE)+
  facet_wrap(.~Variable, scales = "free")+
  scale_x_discrete(name = "",labels = levels(DF$Labels))

enter image description here

मुझे आशा है कि यह वही दिखता है जो आप उम्मीद कर रहे हैं।

2
dc37 12 जिंदा 2020, 02:20