问题
Case1: Input
ST_DATE ND_DATE LO_NO ACTV_CODE ACTV_AMT AB_NO FEATURE_CODE L_NU
7/27/16 7/27/16 265 O 15 1 INTEREST 855
7/27/16 7/27/16 265 O 14 1 INTEREST 855
Expected output
ST_DATE ND_DATE LO_NO ACTV_CODE ACTV_AMT AB_NO FEATURE_INTEREST L_NU
7/27/16 7/27/16 265 O 29 1 2 855
Case2: Input (my code is working for case2 but throwing error for case1)
ST_DATE ND_DATE LO_NO ACTV_CODE ACTV_AMT AB_NO FEATURE_CODE L_NU
7/27/16 7/27/16 265 O 15 1 INTEREST 855
7/27/16 7/27/16 265 O 14 1 INSTALLMENT 855
ST_DATE ND_DATE LO_NO ACTV_CODE ACTV_AMT AB_NO INTEREST INSTALLMENT L_NU
7/27/16 7/27/16 265 O 29 1 1 1 855
install_cntdup_less1 <- install_BAN %>%
group_by(AB_NO,LO_NO,L_NU)%>%
mutate(ACTV_AMT = sum(ACTV_AMT),ftr=sum(unique(!is.na(FEATURE_CODE))))%>%
spread(FEATURE_CODE,ftr,fill = 0)%>%
slice(which.min(as.Date(ST_DATE, '%Y/%m/%d')))%>%
slice(which.max(as.Date(ND_DATE, '%Y/%m/%d')))
Getting the below error
Error: Duplicate identifiers for rows (29424, 29425, 29426), (7415, 7416), (30120, 30121)
tried introducing unique ID as mentioned below link but it is messing my output Spread with duplicate identifiers (using tidyverse and %>%)
mutate(ind = row_number()) %>%
I am unable to understand what to do can anyone help me in resolving the error. It may seem duplicate question but it is not
回答1:
This will help:
library(dplyr)
library(tidyr)
# example data
dt = read.table(text = "
ST_DATE ND_DATE LO_NO ACTV_CODE ACTV_AMT AB_NO FEATURE_CODE L_NU
7/27/16 7/27/16 265 O 15 1 INTEREST 855
7/27/16 7/27/16 265 OO 14 1 INTEREST 855
7/27/16 7/27/16 265 O 15 1 OTHER 855
7/27/16 7/27/16 265 OO 14 1 OTHER 855
", header=T, stringsAsFactors = F)
dt %>%
group_by(AB_NO,LO_NO,L_NU)%>%
mutate(ACTV_AMT = sum(ACTV_AMT),
ST_DATE = min(ST_DATE),
ND_DATE = max(ND_DATE)) %>%
ungroup() %>%
mutate(id = row_number(),
FEATURE_CODE = paste0("FEATURE_", FEATURE_CODE),
ACTV_CODE = paste0("ACTV_", ACTV_CODE),
count_FEATURE = 1,
count_ACTV = 1) %>%
spread(FEATURE_CODE, count_FEATURE) %>%
spread(ACTV_CODE, count_ACTV) %>%
select(-id) %>%
group_by(ST_DATE, ND_DATE, LO_NO, ACTV_AMT, AB_NO, L_NU) %>%
summarise_all(sum, na.rm=T) %>%
ungroup()
# # A tibble: 1 x 10
# ST_DATE ND_DATE LO_NO ACTV_AMT AB_NO L_NU FEATURE_INTEREST FEATURE_OTHER ACTV_O ACTV_OO
# <chr> <chr> <int> <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
# 1 7/27/16 7/27/16 265 58 1 855 2 2 2 2
来源:https://stackoverflow.com/questions/47118516/take-unique-count-and-sum-each-unique-values-in-r