plot line width (size) based on counts using ggplot or any other method in R

最后都变了- 提交于 2019-12-06 11:19:15

If you are looking for user-specifc counts of paths then this might help:

ddnew <-   read.csv("https://raw.github.com/bossaround/question/master/data9.csv" ) 

ddnew <- ddnew %>% 
  group_by(user_id) %>% 
  mutate(step_id = paste(step, collapse = ","), 
         milestone_id = paste(milestone, collapse = ",")) %>% 
  group_by(step_id, milestone_id) %>% 
  mutate(width = n())

ggplot(ddnew, aes(x=step, y=milestone, group=user_id)) +
  geom_line(aes(size = width)) +
  scale_x_discrete(limits=c("0","1","2","3","4","5","6","7","8","9")) +
  scale_y_discrete(limits=c("0","1","2","3","4","5","6","7","8","9"))

The idea is to count unique user-specific paths and assign these counts as width in the geom_line() aesthetic.

Does this help?

library(ggplot2)
ddnew <- read.csv("https://raw.github.com/bossaround/question/master/data9.csv" ) 
ggplot(ddnew, aes(x=step, y=milestone, group=user_id)) +
        stat_summary(geom="line", fun.y = "sum", aes(size=milestone),alpha=0.2, color="grey50")+
        scale_x_discrete(limits=factor(0:2)) +
        scale_y_discrete(limits=factor(0:10)) +
        theme(panel.background = element_blank(), 
              legend.position = "none")

One option is to use the riverplot package. First you'll need to summarize your data so that you can define the edges and nodes.

> library(riverplot)
> 
> paths <- spread(ddnew, step, milestone) %>%
+   count(`1`, `2`, `3`)
> paths
Source: local data frame [9 x 4]
Groups: 1, 2 [?]

    `1`   `2`   `3`     n
  <int> <int> <int> <int>
1     1     2     3     7
2     1     2    10     8
3     1     3     2     1
4     1     4     8     1
5     1    10     2   118
6     1    10     3    33
7     1    10     4     2
8     1    10     5     1
9     1    10    NA    46

Next define your nodes (i.e. each combination of step and milestone).

prefix <- function(p, n) {paste(p, n, sep = '-')}

nodes <- distinct(ddnew, step, milestone) %>%
  mutate(ID = prefix(step, milestone),
         y = dense_rank(milestone)) %>%
  select(ID, x = step, y)

Then define your edges:

e12 <- group_by(paths, N1 = `1`, N2 = `2`) %>%
  summarise(Value = sum(n)) %>%
  ungroup() %>%
  mutate(N1 = prefix(1, N1),
         N2 = prefix(2, N2))

e23 <- group_by(paths, N1 = `2`, N2 = `3`) %>%
  filter(!is.na(N2)) %>%
  summarise(Value = sum(n)) %>%
  ungroup() %>%
  mutate(N1 = prefix(2, N1),
         N2 = prefix(3, N2))

edges <- bind_rows(e12, e23) %>% 
  mutate(Value = Value) %>%
  as.data.frame()

Finally, make the plot:

style <- default.style()
style$srt <- '0'  # display node labels horizontally

makeRiver(nodes, edges) %>% plot(default_style = style)

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!