How can I create a column that cumulatively adds the sum of two previous rows based on conditions?

ⅰ亾dé卋堺 提交于 2019-12-04 21:00:00
winner <- c(1,2,3,1,2,3,1,2,3)
loser <-  c(3,1,1,2,1,1,3,1,2)
date <- c("2017-10-01","2017-10-02","2017-10-03","2017-10-04","2017-10-05","2017-10-06","2017-10-07","2017-10-08","2017-10-09")
winner_points <- c(2,1,2,1,2,1,2,1,2)
loser_points <- c(1,0,1,0,1,0,1,0,1)
test_data <- data.frame(winner, loser, date = as.Date(date), winner_points, loser_points)


library(dplyr)
library(tidyr)

test_data %>%
  unite(winner, winner, winner_points) %>%                    # unite winner columns
  unite(loser, loser, loser_points) %>%                       # unite loser columns
  gather(type, pl_pts, winner, loser, -date) %>%              # reshape
  separate(pl_pts, c("player","points"), convert = T) %>%     # separate columns
  arrange(date) %>%                                           # order dates (in case it's not)
  group_by(player) %>%                                        # for each player
  mutate(sum_points = cumsum(points) - points) %>%            # get points up to that date
  ungroup() %>%                                               # forget the grouping
  unite(pl_pts_sumpts, player, points, sum_points) %>%        # unite columns
  spread(type, pl_pts_sumpts) %>%                             # reshape
  separate(loser, c("loser", "loser_points", "loser_points_sum"), convert = T) %>%                # separate columns and give appropriate names
  separate(winner, c("winner", "winner_points", "winner_points_sum"), convert = T) %>%
  select(winner, loser, date, winner_points, loser_points, winner_points_sum, loser_points_sum)   # select the order you prefer


# # A tibble: 9 x 7
#   winner loser       date winner_points loser_points winner_points_sum loser_points_sum
# *  <int> <int>     <date>         <int>        <int>             <int>            <int>
# 1      1     3 2017-10-01             2            1                 0                0
# 2      2     1 2017-10-02             1            0                 0                2
# 3      3     1 2017-10-03             2            1                 1                2
# 4      1     2 2017-10-04             1            0                 3                1
# 5      2     1 2017-10-05             2            1                 1                4
# 6      3     1 2017-10-06             1            0                 3                5
# 7      1     3 2017-10-07             2            1                 5                4
# 8      2     1 2017-10-08             1            0                 3                7
# 9      3     2 2017-10-09             2            1                 5                4

I finally understood what you want. And I took an approach of getting cumulative points of each player at each point in time and then joining it to the original test_data data frame.

winner <- c(1,2,3,1,2,3,1,2,3)
loser <-  c(3,1,1,2,1,1,3,1,2)
date <- c("2017-10-01","2017-10-02","2017-10-03","2017-10-04","2017-10-05","2017-10-06","2017-10-07","2017-10-08","2017-10-09")
winner_points <- c(2,1,2,1,2,1,2,1,2)
loser_points <- c(1,0,1,0,1,0,1,0,1)
test_data <- data.frame(winner, loser, date = as.Date(date), winner_points, loser_points)

library(dplyr)
library(tidyr)

cum_points <- test_data %>% 
  gather(end_game_status, player_id, winner, loser) %>% 
  gather(which_point, how_many_points, winner_points, loser_points) %>% 
  filter(
    (end_game_status == "winner" & which_point == "winner_points") | 
      (end_game_status == "loser" & which_point == "loser_points")) %>% 
  arrange(date = as.Date(date)) %>% 
  group_by(player_id) %>% 
  mutate(cumulative_points = cumsum(how_many_points)) %>% 
  mutate(cumulative_points_sofar = lag(cumulative_points, default = 0))
  select(player_id, date, cumulative_points)

output <- test_data %>% 
  left_join(cum_points, by = c('date', 'winner' = 'player_id')) %>% 
  rename(winner_points_sum = cumulative_points_sofar) %>% 
  left_join(cum_points, by = c('date', 'loser' = 'player_id')) %>% 
  rename(loser_points_sum = cumulative_points_sofar)
output

The difference to the previous question of the OP is that the OP is now asking for the cumulative sum of points each player has scored so far, i.e., before the actual date. Furthermore, the sample data set now contains a date column which uniquely identifies each row.

So, my previous approach can be used here as well, with some modifications. The solution below reshapes the data from wide to long format whereby two value variables are reshaped simultaneously, computes the cumulative sums for each player id , and finally reshapes from long back to wide format, again. In order to sum only points scored before the actual date, the rows are lagged by one.

It is important to note that the winner and loser columns contain the respective player ids.

library(data.table)
cols <- c("winner", "loser")
setDT(test_data)[
  # reshape multiple value variables simultaneously from wide to long format
  , melt(.SD, id.vars = "date", 
         measure.vars = list(cols, paste0(cols, "_points")), 
         value.name = c("id", "points"))][
           # rename variable column
           , variable := forcats::lvls_revalue(variable, cols)][
             # order by date and cumulate the lagged points by id
             order(date), points_sum := cumsum(shift(points, fill = 0)), by = id][
               # reshape multiple value variables simultaneously from long to wide format
               , dcast(.SD, date ~ variable, value.var = c("id", "points", "points_sum"))]
         date id_winner id_loser points_winner points_loser points_sum_winner points_sum_loser
1: 2017-10-01         1        3             2            1                 0                0
2: 2017-10-02         2        1             1            0                 0                2
3: 2017-10-03         3        1             2            1                 1                2
4: 2017-10-04         1        2             1            0                 3                1
5: 2017-10-05         2        1             2            1                 1                4
6: 2017-10-06         3        1             1            0                 3                5
7: 2017-10-07         1        3             2            1                 5                4
8: 2017-10-08         2        1             1            0                 3                7
9: 2017-10-09         3        2             2            1                 5                4
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!