问题
This question is a follow up to my post from this answer.
Data
df1 <- structure(list(Date = c("6/24/2020", "6/24/2020", "6/24/2020",
"6/24/2020", "6/25/2020", "6/25/2020"), Market = c("A", "A",
"A", "A", "A", "A"), Salesman = c("MF", "RP", "RP", "FR", "MF",
"MF"), Product = c("Apple", "Apple", "Banana", "Orange", "Apple",
"Banana"), Quantity = c(20L, 15L, 20L, 20L, 10L, 15L), Price = c(1L,
1L, 2L, 3L, 1L, 1L), Cost = c(0.5, 0.5, 0.5, 0.5, 0.6, 0.6)),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))
Solution
library(dplyr) # 1.0.0
library(tidyr)
df1 %>%
group_by(Date, Market) %>%
group_by(Revenue = c(Quantity %*% Price),
TotalCost = c(Quantity %*% Cost),
Product, .add = TRUE) %>%
summarise(Sold = sum(Quantity)) %>%
pivot_wider(names_from = Product, values_from = Sold)
# A tibble: 2 x 7
# Groups: Date, Market, Revenue, TotalCost [2]
# Date Market Revenue TotalCost Apple Banana Orange
# <chr> <chr> <dbl> <dbl> <int> <int> <int>
#1 6/24/2020 A 135 37.5 35 20 20
#2 6/25/2020 A 25 15 10 15 NA
@akrun's solution works well. Now I'd like to know how to add three more columns for quantity sold by salesmen to the existing results so the final output will look like this:
Date Market Revenue Total Cost Apples Sold Bananas Sold Oranges Sold MF RP FR
6/24/2020 A 135 37.5 35 20 20 20 35 20
6/25/2020 A 25 15 15 25 NA 25 NA NA
回答1:
One option would be to do the group by operations separately as these are done on separate columns and then do a join by the common columns i.e. 'Date', 'Market'
library(dplyr)
library(tidyr)
out1 <- df1 %>%
group_by(Date, Market) %>%
group_by(Revenue = c(Quantity %*% Price),
TotalCost = c(Quantity %*% Cost),
Product, .add = TRUE) %>%
summarise(Sold = sum(Quantity)) %>%
pivot_wider(names_from = Product, values_from = Sold)
out2 <- df1 %>%
group_by(Date, Market, Salesman) %>%
summarise(SalesSold = sum(Quantity)) %>%
pivot_wider(names_from = Salesman, values_from = SalesSold)
left_join(out1, out2)
# A tibble: 2 x 10
# Groups: Date, Market, Revenue, TotalCost [2]
# Date Market Revenue TotalCost Apple Banana Orange FR MF RP
# <chr> <chr> <dbl> <dbl> <int> <int> <int> <int> <int> <int>
#1 6/24/2020 A 135 37.5 35 20 20 20 20 35
#2 6/25/2020 A 25 15 10 15 NA NA 25 NA
来源:https://stackoverflow.com/questions/62603956/how-to-add-additional-columns-using-tidyr-group-by-function-in-r