Highlight all connected paths from start to end in Sankey graph using R

前端 未结 2 1321
Happy的楠姐
Happy的楠姐 2020-12-05 15:18

I want to highlight the whole path when i click on the node to know the whole story of specific node and here\'s an example- http://bl.ocks.org/git-ashish/8959771 .

2条回答
  •  一生所求
    2020-12-05 16:20

    Given the R code data structure you provided...

    First, sankeyNetwork expects data that lists edges/links and the nodes that are connected by those links. Your data has a... let's call it a "traveler"-centric format, where each row of your data is related to a specific "path". So first you need to convert that data into the type of data that sankeyNetwork needs, while retaining the information needed to identify links to the path they came from. Additionally, your data only has one city in it, so it will be hard to see the result unless there's at least two different origins for the paths in your data, so I'll duplicate it and attribute the second set to a different city. Here's an example of that...

    library(tidyverse)
    
    # duplicate the data for another city so we have more than 1 origin
    links <-
      df %>%
      full_join(mutate(df, City = "Hong Kong")) %>%
      mutate(row = row_number()) %>%
      mutate(origin = .[[1]]) %>%
      gather("column", "source", -row, -origin) %>%
      mutate(column = match(column, names(df))) %>%
      arrange(row, column) %>%
      group_by(row) %>%
      mutate(target = lead(source)) %>%
      ungroup() %>%
      filter(!is.na(target)) %>%
      select(source, target, origin) %>%
      group_by(source, target, origin) %>%
      summarise(count = n()) %>%
      ungroup()
    
    nodes <- data.frame(name = unique(c(links$source, links$target)))
    links$source <- match(links$source, nodes$name) - 1
    links$target <- match(links$target, nodes$name) - 1
    

    Now you have a links and nodes data frame in the form that sankeyNetwork expects, and the links data frame has an extra column origin that identifies which city each link is on the path from. You can now plot this with sankeyNetwork, add back in the origin data since it gets stripped out, and then use htmlwidgets::onRender to assign a click behavior that changes the opacity of any link whose origin is the city node that was clicked...

    library(networkD3)
    library(htmlwidgets)
    
    sn <- sankeyNetwork(Links = links, Nodes = nodes, Source = 'source',
                        Target = 'target', Value = 'count', NodeID = 'name')
    
    # add origin back into the links data because sankeyNetwork strips it out
    sn$x$links$origin <- links$origin
    
    
    # add onRender JavaScript to set the click behavior
    htmlwidgets::onRender(
      sn,
      '
      function(el, x) {
        var nodes = d3.selectAll(".node");
        var links = d3.selectAll(".link");
        nodes.on("mousedown.drag", null); // remove the drag because it conflicts
        nodes.on("click", clicked);
        function clicked(d, i) {
          links
            .style("stroke-opacity", function(d1) {
                return d1.origin == d.name ? 0.5 : 0.2;
              });
        }
      }
      '
    )
    

    Here is a simplified version of the above answer (with a smaller example dataset) which keeps each "path" separate, rather than aggregating like paths and incrementing a count/Value variable.

    library(dplyr)
    library(tidyr)
    library(networkD3)
    library(htmlwidgets)
    
    df <- read.csv(header = T, as.is = T, text = '
    name,origin,layover,destination
    Bob,Baltimore,Chicago,Los Angeles
    Bob,Baltimore,Chicago,Seattle
    Bob,New York,St Louis,Austin
    Bob,New York,Chicago,Seattle
    Tom,Baltimore,Chicago,Los Angeles
    Tom,New York,St Louis,San Diego
    Tom,New York,Chicago,Seattle
    Tom,New York,New Orleans,Austin
    ')
    
    links <-
      df %>%
      mutate(row = row_number()) %>%
      mutate(traveler = .[[1]]) %>%
      gather("column", "source", -row, -traveler) %>%
      mutate(column = match(column, names(df))) %>%
      arrange(row, column) %>%
      group_by(row) %>%
      mutate(target = lead(source)) %>%
      ungroup() %>%
      filter(!is.na(target)) %>%
      select(source, target, traveler) %>%
      group_by(source, target, traveler) %>%
      summarise(count = n()) %>%
      ungroup()
    
    nodes <- data.frame(name = unique(c(links$source, links$target)))
    links$source <- match(links$source, nodes$name) - 1
    links$target <- match(links$target, nodes$name) - 1
    
    sn <- sankeyNetwork(Links = links, Nodes = nodes, Source = 'source',
                        Target = 'target', Value = 'count', NodeID = 'name')
    
    # add origin back into the links data because sankeyNetwork strips it out
    sn$x$links$traveler <- links$traveler
    
    # add onRender JavaScript to set the click behavior
    htmlwidgets::onRender(
      sn,
      '
      function(el, x) {
        var nodes = d3.selectAll(".node");
        var links = d3.selectAll(".link");
        nodes.select("rect").style("cursor", "pointer");
        nodes.on("mousedown.drag", null); // remove the drag because it conflicts
        //nodes.on("mouseout", null);
        nodes.on("click", clicked);
        function clicked(d, i) {
          links
            .style("stroke-opacity", function(d1) {
                return d1.traveler == d.name ? 0.5 : 0.2;
              });
        }
      }
      '
    )
    

提交回复
热议问题