Efficient rendering of data points from large data plot in Shiny

心不动则不痛 提交于 2020-08-08 05:14:21

问题


Goal

Implement a Shiny app to efficiently visualize and adjust uploaded data sets. Each set may contain 100000 to 200000 rows. After data adjustments are done, the adjusted data can be downloaded. In steps:

  1. Data upload
  2. Data selection and visualization
  3. Data (point) removal
  4. Download option

Issue

While the app works in principal, data visualization and removal take too much time.

Code

Sample data

Some sample data is generated. The data can be uploaded onto the shiny app. The sample data distribution is not similar to my actual data. The actual data contains clearly identifiable outliers and looks like a spectra with peaks.

a = sample(1:1e12, 1e5, replace=TRUE)
b = sample(1:1e12, 1e5, replace=TRUE)
dummy1 = data.frame(Frequency = a, Amplitude = a)
dummy2 = data.frame(Frequency = b, Amplitude = b)
dummy3 = data.frame(Frequency = a, Amplitude = b)
# Sample data
write.csv(dummy1,'dummy1.csv')
write.csv(dummy2,'dummy2.csv')
write.csv(dummy3,'dummy2.csv')

Shiny app

The app takes the uploaded data and plots it. (Sample dummy data can be uploaded onto the app.) Section of data points can be removed and the new data can be downloaded.

# Packages
library(shiny)
library(ggplot2)
library(data.table)
# UI
ui = fluidPage(
    fluidRow(selectInput("selection", "Set Selection:", choices = '', selected = '', multiple = TRUE)),
    fluidRow(plotOutput(outputId = "plot", brush = "plot_brush_"), 
             downloadButton('download',"Download the data"))
)

# Server
server = function(session, input, output){
    # Pop up for data upload
    query_modal = modalDialog(title = "Upload Spectrum",
                              fileInput("file", 
                              "file",
                              multiple = TRUE,
                              accept = c(".csv")),
                              easyClose = FALSE)
    showModal(query_modal)

    ## Upload
    mt1 = reactive({
       req(input$file)
       cs = list()
       for(nr in 1:length(input$file[ , 1])){
          c = read.csv(input$file[[nr, 'datapath']])
          cs[[nr]] = data.table(Frequency = as.numeric(c[[1]]), 
                                Amplitude = as.numeric(c[[2]]), 
                                Indicator = as.factor(nr))}
        c = do.call(rbind, cs)
        c = reactiveValues(data = c)
        return(c)})

    ## Input selection
    observeEvent(
      mt1(),
      updateSelectInput(
        session, 
        "selection", 
        "Set Selection:", 
        choices = levels(mt1()$data$Indicator), 
        selected = 'Entire'))
    
    ## Plot
    output$plot <- renderPlot({
      mt = mt1()$data
      mt = mt[mt$Indicator %in% input$selection,]
      p = ggplot(mt, aes(Frequency, Amplitude, color = Indicator)) 
      p + geom_point(show.legend = TRUE)})
    
    ## Download
    output$download = downloadHandler(
      filename = function(){paste(gsub('.{1}$', '', input$file$name[1]), 'manipulated', '.csv', sep= '')}, 
      content = function(fname){
        mt = mt1()$data
        mt = mt[, .SD, .SDcols= c('Frequency', 
                                  'Amplitude', 
                                  'Indicator')]
        write.csv(mt, fname, row.names = FALSE)})
    
    ## Adjust
    observe({
      d = mt$data
      keep = mt$data[!Indicator %in% input$selection]
      df = brushedPoints(d, brush = input$plot_brush_, allRows = TRUE) 
      df = df[selected_ == FALSE]
      df$selected_ = NULL
      mt$data = rbind(keep , df[Indicator %in% input$selection,  ])})
}

# Run app
shinyApp(ui = ui, server = server)

回答1:


You could use matplotlib Python drawing library inside R and Shiny with the reticulate package :

  1. Set up the package and the libraries :
install.packages('reticulate')

# Install python environment
reticulate::install_miniconda() 
# if Python is already installed, you can specify the path with use_python(path)

# Install matplotlib library
reticulate::py_install('matplotlib')
  1. test installation :
library(reticulate)
mpl <- import("matplotlib")
mpl$use("Agg") # Stable non interactive backend
mpl$rcParams['agg.path.chunksize'] = 0 # Disable error check on too many points

plt <- import("matplotlib.pyplot")
np <- import("numpy")

# generate lines cloud
xx = np$random$randn(100000L)
yy = np$random$randn(100000L)

plt$figure()
plt$plot(xx,yy)
plt$savefig('test.png')
plt$close(plt$gcf())

test.png :

  1. Use matplotlib in Shiny, drawing duration below 2 seconds for 1e5 segments :
# Packages
library(shiny)
library(ggplot2)
library(data.table)
# UI
ui = fluidPage(
  fluidRow(selectInput("selection", "Set Selection:", choices = '', selected = '', multiple = TRUE)),
  fluidRow(imageOutput(outputId = "image"), 
           downloadButton('download',"Download the data"))
)

# Server
server = function(session, input, output){
  
  # Setup Python objects
  mpl <- reticulate::import("matplotlib")
  plt <- reticulate::import("matplotlib.pyplot")
  mpl$use("Agg") 
  mpl$rcParams['agg.path.chunksize'] = 0
  
  
  # Pop up for data upload
  query_modal = modalDialog(title = "Upload Spectrum",
                            fileInput("file", 
                                      "file",
                                      multiple = TRUE,
                                      accept = c(".csv")),
                            easyClose = FALSE)
  showModal(query_modal)
  
  ## Upload
  mt1 = reactive({
    req(input$file)
    cs = list()
    for(nr in 1:length(input$file[ , 1])){
      c = read.csv(input$file[[nr, 'datapath']])
      cs[[nr]] = data.table(Frequency = as.numeric(c[[1]]), 
                            Amplitude = as.numeric(c[[2]]), 
                            Indicator = as.factor(nr))}
    c = do.call(rbind, cs)
    c = reactiveValues(data = c)
    return(c)})
  
  ## Input selection
  observeEvent(
    mt1(),
    updateSelectInput(
      session, 
      "selection", 
      "Set Selection:", 
      choices = levels(mt1()$data$Indicator), 
      selected = 'Entire'))
  
  ## Render matplotlib image
  output$image <- renderImage({
    # Read myImage's width and height. These are reactive values, so this
    # expression will re-run whenever they change.
    width  <- session$clientData$output_image_width
    height <- session$clientData$output_image_height
    
    # For high-res displays, this will be greater than 1
    pixelratio <- session$clientData$pixelratio
    
    # A temp file to save the output.
    outfile <- tempfile(fileext='.png')
    
    # Generate the image file
    mt = mt1()$data
    mt = mt[mt$Indicator %in% input$selection,]
    xx = mt$Frequency
    yy = mt$Amplitude
    
    plt$figure()
    plt$plot(xx,yy)
    plt$savefig(outfile)
    plt$close(plt$gcf())
    
    # Return a list containing the filename
    list(src = outfile,
         width = width,
         height = height,
         alt = "This is alternate text")
  }, deleteFile = TRUE)
  
  ## Download
  output$download = downloadHandler(
    filename = function(){paste(gsub('.{1}$', '', input$file$name[1]), 'manipulated', '.csv', sep= '')}, 
    content = function(fname){
      mt = mt1()$data
      mt = mt[, .SD, .SDcols= c('Frequency', 
                                'Amplitude', 
                                'Indicator')]
      write.csv(mt, fname, row.names = FALSE)})
  
  ## Adjust
  observe({
    mt = mt1()
    df = brushedPoints(mt$data, brush = input$plot_brush_, allRows = TRUE) 
    mt$data = df[df$selected_ == FALSE,  ]})
}

# Run app
shinyApp(ui = ui, server = server)

You'll need to handle color manually, because matplotlib isn't ggplot2



来源:https://stackoverflow.com/questions/63056501/efficient-rendering-of-data-points-from-large-data-plot-in-shiny

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!