Histogram with “negative” logarithmic scale in R

前端 未结 4 1998
名媛妹妹
名媛妹妹 2020-12-08 12:40

I have a dataset with some outliers, such as the following

x <- rnorm(1000,0,20)
x <- c(x, 500, -500)

If we plot this on a linear x a

4条回答
  •  轻奢々
    轻奢々 (楼主)
    2020-12-08 13:05

    I found a way to cheat on it. I say "cheat", because it actually plot negative and positive parts of the data separately. Thus you can not compare the negative and positive data. But only can show the distribution of negative and positive parts separately.

    And one of the problem is if there is zero values in your data, it will not be shown in the plot.

    reverselog_trans <- function(base = exp(1)) {
      trans <- function(x) -log(x, base)
      inv <- function(x) base^(-x)
      trans_new(paste0("reverselog-", format(base)), trans, inv, 
                log_breaks(base = base), 
                domain = c(1e-100, Inf))
    }
    
    quartz();
    
    
    dist1 <- ggplot(data=df.meltFUAC) +
      geom_point(alpha=1,aes(x=deltaU.deltaUltrasensitivity,y=deltaF.deltaFitness, 
                             colour=deltaF.w_c)) + 
      scale_x_continuous(name = expression(Delta * S[ult]), 
                         limits=c(1e-7,1),trans = "log10",breaks=c(1e-01,1e-03,1e-05),
                         labels=c("1e-01","1e-03","1e-05")) + 
      scale_y_continuous(name = expression(paste(Delta, " Fitness")),trans = "log10", 
                         limits = c(1e-7,1), breaks=c(1e-01,1e-03,1e-05),
                         labels=c("1e-01","1e-03","1e-05")) +
      theme_bw() +
      theme(legend.position = "none", axis.title.x=element_blank(),strip.background=element_blank(),
            panel.border=element_rect(colour = "black"),panel.grid.major=element_blank(),
            panel.grid.minor=element_blank(),plot.background=element_blank(),
            plot.margin=unit(c(0,0,0,-11),"mm"))
    
    dist2 <- ggplot(data=df.meltFUAC, aes(x=-deltaU.deltaUltrasensitivity,y=deltaF.deltaFitness, 
                                          colour=deltaF.w_c)) +
      geom_point(alpha=1) + 
      scale_x_continuous(name = expression(Delta * sqrt(S[ult] %.% S[amp])),limits=c(1,1e-7),
                         trans = reverselog_trans(10),breaks=c(1e-01,1e-03,1e-05),
                         labels=c("-1e-01","-1e-03","-1e-05")) +
      scale_y_continuous(name = expression(paste(Delta, " Fitness")),trans = "log10", 
                         limits = c(1e-7,1), breaks=c(1e-01,1e-03,1e-05),
                         labels=c("1e-01","1e-03","1e-05")) +
      theme_bw() +
      theme(legend.position = "none",strip.background=element_blank(),panel.border=element_rect(colour = "black"),
            axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(),
            axis.line.y=element_line(colour="black",size=1,linetype="solid"),axis.title.x=element_blank(),
            panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),
            plot.margin=unit(c(0,-8,0,2.5),"mm"))
    
    hist0 <- ggplot(data=df.meltFUAC, aes(deltaF.deltaFitness,fill=deltaF.w_c)) +
      #geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') +
      geom_density(alpha = 0.5, aes(colour=deltaF.w_c)) +
      scale_x_continuous(name = expression(paste(Delta, " Fitness")), 
                         limits=c(1e-7,1),trans = "log10",breaks=c(1e-01,1e-03,1e-05),
                         labels=c("1e-01","1e-03","1e-05")) + 
      scale_y_continuous(name = "Density", limits=c(0,0.6)) + 
      theme_bw() +
      theme(legend.position = "none", axis.title.x=element_blank(),strip.background=element_blank(),
            axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(),
            axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title.x=element_blank(),
            panel.border=element_rect(colour = "black"),panel.grid.major=element_blank(),
            panel.grid.minor=element_blank(),plot.background=element_blank(),
            plot.margin=unit(c(0,5,2.5,-2.5),"mm")) +
      coord_flip()
    
    hist1 <- ggplot(data=df.meltFUAC, aes(deltaU.deltaUltrasensitivity,fill=deltaF.w_c)) +
      #geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') +
      geom_density(alpha = 0.5, aes(colour=deltaF.w_c)) +
      scale_x_continuous(name = expression(Delta * S[ult]), 
                         limits=c(1e-7,1),trans = "log10",breaks=c(1e-01,1e-03,1e-05),
                         labels=c("1e-01","1e-03","1e-05")) + 
      scale_y_continuous(name = "Density", limits=c(0,0.6)) + 
      theme_bw() +
      theme(legend.position = "none", axis.title.x=element_blank(),strip.background=element_blank(),
            axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(),
            axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title.x=element_blank(),
            axis.line.x=element_line(colour="black",size=1,linetype="solid"),
            panel.border=element_rect(colour = "black"),panel.grid.major=element_blank(),
            panel.grid.minor=element_blank(),plot.background=element_blank(),
            plot.margin=unit(c(5,0,-2.5,2),"mm"))
    
    hist2 <- ggplot(data=df.meltFUAC, aes(-deltaU.deltaUltrasensitivity,fill=deltaF.w_c)) +
      #geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') +
      geom_density(alpha = 0.5, aes(colour=deltaF.w_c)) +
      scale_x_continuous(name = expression(Delta * S[ult]),limits=c(1,1e-7),
                         trans = reverselog_trans(10),breaks=c(1e-01,1e-03,1e-05),
                         labels=c("-1e-01","-1e-03","-1e-05")) +
      scale_y_continuous(name = "Density", limits=c(0,0.6)) + 
      theme_bw() +
      theme(legend.position = "none",strip.background=element_blank(),panel.border=element_rect(colour = "black"),
            axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(),
            axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title.x=element_blank(),
            axis.line.y=element_line(colour="black",size=1,linetype="solid"),
            axis.line.x=element_line(colour="black",size=1,linetype="solid"),
            panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),
            plot.margin=unit(c(5,-8,-2.5,2.5),"mm"))
    
    
    
    grid.newpage();
    pushViewport(viewport(layout = grid.layout(3, 3, widths = unit(c(4,4,2),"null"),
                                               heights=unit(c(2,7.5,0.5),"null"))));
    vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y);
    
    print(dist2, vp = vplayout(2, 1));
    print(dist1, vp = vplayout(2, 2));
    print(hist2, vp = vplayout(1, 1));
    print(hist1, vp = vplayout(1, 2));
    print(hist0, vp = vplayout(2, 3));
    grid.text(expression(Delta * Ultrasensitivity),vp = vplayout(3,1:2),x = unit(0.55, "npc"), 
              y = unit(0.9, "npc"),gp=gpar(fontsize=12, col="black"));
    
    dev.copy2pdf(file=sprintf("%s/_dist/dist_hist_deltaF_deltaU_wc_01vs10.pdf", resultDir));
    dev.off();
    

    Here is the graph it got (but you need to manually to put the legend on):

    enter image description here

    Or a simpler one:

    reverselog_trans <- function(base = exp(1)) {
      trans <- function(x) -log(x, base)
      inv <- function(x) base^(-x)
      trans_new(paste0("reverselog-", format(base)), trans, inv, 
                log_breaks(base = base), 
                domain = c(1e-100, Inf))
    }
    
    quartz();
    
    hist1 <- ggplot(deltaF, aes(deltaFitness,fill=w_c)) + guides(fill=guide_legend(title=expression(omega[c]))) + geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') + geom_density(alpha = 0.05, aes(colour=w_c)) + scale_x_continuous(name = expression(paste(Delta, " Fitness")),trans = "log10");
    hist1 <- hist1 + scale_y_continuous(name = "Density", limits=c(0,1));
    #hist1 <- hist1 + theme(panel.background=element_blank(),panel.border=element_blank(),axis.line.x=element_blank(),axis.line.y=element_line(colour="black",linetype="solid",size=1),axis.title.x=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(5,5,0,5),"mm"));
    hist1 <- hist1 + theme_bw();
    hist1 <- hist1 + theme(strip.background=element_blank(),panel.border=element_rect(colour = "black"),axis.title.x=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(5,5,0,5),"mm"));
    hist1 <- hist1 + scale_color_discrete(name=expression(omega[c]));# + geom_vline(xintercept=0, colour="grey", size = 1);# + geom_hline(yintercept=0, colour="grey", size = 0.5);
    
    hist2 <- ggplot(deltaU, aes(deltaUltrasensitivity,fill=w_c)) + geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') + geom_density(alpha = 0.05, aes(colour=w_c))  + scale_x_continuous(name = expression(paste(Delta, " Ultrasensitivity")), limits=c(1e-7,1),trans = "log10",breaks=c(1e-01,1e-03,1e-05),labels=c("1e-01","1e-03","1e-05"));
    hist2 <- hist2 + scale_y_continuous(name = "Density",limits=c(0,1)) ;#+ geom_vline(xintercept=0, colour="grey", size = 1);# + geom_hline(yintercept=0, colour="grey", size = 0.5);
    #hist2 <- hist2 + theme(legend.position = "none", axis.title.x=element_blank(),panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(0,5,0,-7.5),"mm"));
    hist2 <- hist2 + theme_bw();
    hist2 <- hist2 + theme(legend.position = "none", axis.title.x=element_blank(),strip.background=element_blank(),panel.border=element_rect(colour = "black"),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(0,5,0,-7.5),"mm"));
    # + ggtitle("Positive part")
    
    hist3 <- ggplot(deltaU, aes(-deltaUltrasensitivity,fill=w_c)) + geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') + geom_density(alpha = 0.05, aes(colour=w_c)) + scale_x_continuous(name = expression(paste(Delta, " Ultrasensitivity")), limits=c(1,1e-7),trans = reverselog_trans(10),breaks=c(1e-01,1e-03,1e-05),labels=c("-1e-01","-1e-03","-1e-05"));
    hist3 <- hist3 + scale_y_continuous(name = "Density", limits=c(0,1));# + geom_hline(yintercept=0, colour="black", size = 0.5);
    #hist3 <- hist3 + theme(legend.position = "none",panel.background=element_blank(),axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(),axis.line.y=element_line(colour="black",size=1,linetype="solid"),axis.title.x=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(0,-7.5,0,5),"mm"));
    hist3 <- hist3 + theme_bw();
    hist3 <- hist3 + theme(legend.position = "none",strip.background=element_blank(),panel.border=element_rect(colour = "black"),axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(),axis.line.y=element_line(colour="black",size=1,linetype="solid"),axis.title.x=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(0,-7.5,0,5),"mm"));
    # + ggtitle("Negative part")
    
    grid.newpage();
    pushViewport(viewport(layout = grid.layout(4, 2, widths = unit(c(5,5),"null"),heights=unit(c(4.6,0.4,4.6,0.4),"null"))));
    vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y);
    print(hist1, vp = vplayout(1, 1:2));  # key is to define vplayout
    grid.text(expression(paste(Delta, " Fitness")),vp = vplayout(2,1:2),x = unit(0.5, "npc"), y = unit(0.9, "npc"),gp=gpar(fontsize=12, col="black"));
    print(hist3, vp = vplayout(3, 1));
    print(hist2, vp = vplayout(3, 2));
    grid.text(expression(paste(Delta, " Ultrasensitivity")),vp = vplayout(4,1:2),x = unit(0.5, "npc"), y = unit(0.9, "npc"),gp=gpar(fontsize=12, col="black"));
    
    
    dev.copy2pdf(file=sprintf("%s/deltaF_deltaU_wc_01vs10.pdf", resultDir));
    dev.off();
    

    Here is the graph I got:

    enter image description here

提交回复
热议问题