Use R to recreate contour plot made in Igor

前端 未结 1 1935
失恋的感觉
失恋的感觉 2020-12-16 06:18

This contour plot, made with the Igor program, is popular in atmospheric chemistry and pollution studies:

I\'m trying to recreate it with R for a friend who

相关标签:
1条回答
  • 2020-12-16 06:42

    I can get a lot closer to the Igor plot using akima::interp instead of loess for the interpolation:

    # read in the data
    dat <- read.csv("contour_plot_data.csv")
    
    # focus on the untransformed values
    dat <- dat[, 1:108]
    
    # get Diameter value from col names
    Diameter <- as.numeric(gsub("X", "", names(dat)[-1]))
    
    # melt data into long format
    # see http://www.cookbook-r.com/Manipulating_data/Converting_data_between_wide_and_long_format/
    library(tidyr)
    dat_long <- gather(dat, "Diameter", "dN_dlogDp", 2:108)
    
    # we want diameter as a numeric
    dat_long$Diameter <- as.numeric(gsub("X", "", dat_long$Diameter ))
    # we want time as a date-formatted variable
    x <-  as.character(dat_long$Time)
    date_ <- as.Date(x, format = "%d/%m/%Y") 
    time_ <- gsub(" ", "", substr(x, nchar(x) - 4, nchar(x)))
    dat_long$Time <- as.POSIXct(paste0(date_, " ", time_))
    
    # The Igor plot seems to use log dN_dlogDp values, so let's get those
    dat_long$dN_dlogDp_log <- log10(dat_long$dN_dlogDp)
    dat_long$dN_dlogDp_log <- ifelse(dat_long$dN_dlogDp_log == "NaN" |
                                       dat_long$dN_dlogDp_log == "-Inf"  , 0, dat_long$dN_dlogDp_log)
    
    
    # interpolate between the  values for a smoother contour
    # this takes a moment or two...
    
    library(akima)
    xo <- with(dat_long, seq(min(Time), max(Time), 120))
    yo <- with(dat_long, seq(min(Diameter), max(Diameter), 0.5))
    dat_interp <- with(dat_long, interp(Time, Diameter, dN_dlogDp_log, xo = xo, yo = yo) )
    
    # get on with plotting...
    
    # make into a data frame for ggplot
    dat_interp_df <-  data.frame(matrix(data = dat_interp$z, ncol = length(dat_interp$y), nrow = length(dat_interp$x)))
    names(dat_interp_df) <- dat_interp$y
    dat_interp_df$Time <- as.POSIXct(dat_interp$x, origin = "1970-01-01")
    
    # wide to long
    dat_interp_df_long <- gather(dat_interp_df, "Diameter", "dN_dlogDp_log", 1:(ncol(dat_interp_df)-1))
    dat_interp_df_long$Diameter <- as.numeric(as.character(dat_interp_df_long$Diameter))
    
    # plot
    library(ggplot2) 
    library(scales)
    y_labels_breaks <- seq(0, max(Diameter), 100)
    ggplot(dat_interp_df_long, aes(y = Diameter, x = Time,  fill = dN_dlogDp_log)) +
      geom_raster(interpolate = TRUE)  +
      scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = rev(rainbow(50))) +
      scale_y_continuous(expand = c(0,0), breaks = y_labels_breaks ) +
      scale_x_datetime(expand = c(0,0), breaks = date_breaks("1 day"))
    

    But there is still quite a big difference in the colour mapping, with the Igor plot having wide bands with sharp boundaries, and my plot has fewer colour bands and fuzzy boundaries between them. So I guess I don't quite have the interpolation method that the Igor plot uses.

    UPDATE after experimenting with a bunch of colour ramps, I've found a pretty good match in colorRamps::blue2green2red. I've also put a bit of effort here into fancy tickmarks:

    # plot
    library(ggplot2) 
    library(scales) # for date_breaks
    library(colorRamps) # for blue2green2red
    
    # function for minor tick marks
    every_nth <- function(x, nth, empty = TRUE, inverse = FALSE) 
    {
      if (!inverse) {
        if(empty) {
          x[1:nth == 1] <- ""
          x
        } else {
          x[1:nth != 1]
        }
      } else {
        if(empty) {
          x[1:nth != 1] <- ""
          x
        } else {
          x[1:nth == 1]
        }
      }
    }
    
    # add tick marks every two hours
    start_date <- min(dat_interp_df_long$Time)
    end_date <-  max(dat_interp_df_long$Time)
    date_breaks_2h <-  seq(from = start_date, to = end_date, by = "2 hours")
    date_breaks_1_day <- seq(from = start_date, to = end_date, by = "1 day")
    multiple <- length(date_breaks_2h) / length(date_breaks_1_day)
    
    insert_minor <- function(major_labs, n_minor) {labs <- 
      c( sapply( major_labs, function(x) c(x, rep("", multiple) ) ) )
    labs[1:(length(labs)-n_minor)]}
    
    
    y_labels_breaks <- seq(0, max(Diameter), 100)
    mytheme <- theme_bw(base_size = 14) +  theme(aspect.ratio = 1/5)
    ggplot(dat_interp_df_long, aes(y = Diameter, x = Time,  fill = dN_dlogDp_log)) +
      geom_raster(interpolate = TRUE)  +
      scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = blue2green2red(100)) +
      scale_y_continuous(expand = c(0,0), 
                         labels = every_nth(y_labels_breaks, 2, inverse = TRUE),
                         breaks = y_labels_breaks) +
      scale_x_datetime(expand = c(0,0), 
                       breaks=date_breaks_2h, 
                       labels=insert_minor(format(date_breaks_1_day, "%d %b"),  
                                           length(date_breaks_1_day))) +
      xlab("Day and time") +
      ylab("Diameter (nm)") +
      mytheme
    

    The green-blue gradient is still a bit different from the Igor plot. I have very little green at all! Perhaps further experimentation with colour ramps might improve the match there.

    To get the y-axis on a log scale, some additional effort is required. We have to use geom_rect and adjust the sizes of each rectangle to fit in the log scale:

    ##################  y-axis with log scale ###########################
    # get visually diminishing axis ticks
    base_breaks <- function(n = 10){
      function(x) {
        axisTicks(log10(range(x, na.rm = TRUE)), log = TRUE, n = n)
      }
    }
    
    # Now with log axis, we need to replace the ymin and ymax
    distance <- diff((unique(dat_interp_df_long$Diameter)))/2
    upper <- (unique(dat_interp_df_long$Diameter)) + c(distance, distance[length(distance)])
    lower <- (unique(dat_interp_df_long$Diameter)) - c(distance[1], distance) 
    
    # Create xmin, xmax, ymin, ymax
    dat_interp_df_long$xmin <- dat_interp_df_long$Time - 1000 # default of geom_raster is 0.5
    dat_interp_df_long$xmax <- dat_interp_df_long$Time + 1000
    idx <- rle(dat_interp_df_long$Diameter)$lengths[1]
    dat_interp_df_long$ymin <- unlist(lapply(lower, function(i) rep(i, idx)))
    dat_interp_df_long$ymax <- unlist(lapply(upper, function(i) rep(i, idx)))
    
    
    ggplot(dat_interp_df_long, aes(y = Diameter, x = Time, 
                                   xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, 
                                   fill = dN_dlogDp_log)) +
      geom_rect()  +
      scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = blue2green2red(1000)) +
      scale_y_continuous(expand = c(0,0), 
                         trans = log_trans(), breaks = base_breaks()) +
      scale_x_datetime(expand = c(0,0), 
                       breaks=date_breaks_2h, 
                       labels=insert_minor(format(date_breaks_1_day, "%d %b"),  
                                           length(date_breaks_1_day))) +
      xlab("Day and time") +
      ylab("Diameter (nm)") +
      mytheme
    

    UPDATE After some experimentation with colour ramps, I've found a pretty close match:

    # adjust the colour ramp to match the Igor plot (their colour ramp is pretty uneven! lots of red and blue, it seems.)
    colfunc <- colorRampPalette(c( rep("red", 3), 
                                   rep("yellow", 1), 
                                   rep("green", 2), 
                                   "cyan", 
                                   rep("blue", 3), 
                                   "purple"))
    y_labels_breaks <- seq(0, max(Diameter), 100)
    mytheme <- theme_bw(base_size = 14) +  theme(aspect.ratio = 1/5)
    ggplot(dat_interp_df_long, aes(y = Diameter, x = Time,  fill = dN_dlogDp_log)) +
      geom_raster(interpolate = TRUE)  +
      scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = rev(colfunc(100))) +
      scale_y_continuous(expand = c(0,0), 
                         labels = every_nth(y_labels_breaks, 2, inverse = TRUE),
                         breaks = y_labels_breaks) +
      scale_x_datetime(expand = c(0,0), 
                       breaks=date_breaks_2h, 
                       labels=insert_minor(format(date_breaks_1_day, "%d %b"),  
                                           length(date_breaks_1_day))) +
      xlab("Day and time") +
      ylab("Diameter (nm)") +
      mytheme
    

    The code from this post is also at https://gist.github.com/benmarwick/9a54cbd325149a8ff405

    UPDATE I've now made a package that will produce these plots: https://github.com/benmarwick/smps

    0 讨论(0)
提交回复
热议问题