Use put two value columns in spread() function in R [duplicate]

橙三吉。 提交于 2019-12-01 04:41:20

Try:

library(dplyr)
library(tidyr)

df %>%
  group_by(id1) %>%
  mutate(id = row_number()) %>%
  gather(key, value, -(id1:info), -id) %>%
  unite(id_key, id, key) %>%
  spread(id_key, value)

Which gives:

#Source: local data frame [2 x 9]

#  id1 id2  info 1_action_comment 1_action_time 2_action_comment 2_action_time 3_action_comment 3_action_time
#1   1   a info1         comment1         time1         comment2         time2         comment3         time3
#2   2   b info2         comment4         time4         comment5         time5               NA            NA

We could do this with the devel version of data.table which can take multiple value.var columns. Instructions to install the devel version are here

We convert the 'data.frame' to 'data.table' (setDT(df)), create a sequence variable ('ind') using the grouping variables ('id1', 'id2', 'info'), and dcast from 'long' to 'wide' format by specifying the value.var as 'action_time' and 'action_comment'.

library(data.table)#v1.9.5+
setDT(df)[, ind:= 1:.N, .(id1, id2, info)]
dcast(df, id1 + id2 + info ~ ind,
      value.var=c('action_time', 'action_comment'), fill='')
 #    id1 id2  info 1_action_time 2_action_time 3_action_time 1_action_comment
 #1:   1   a info1         time1         time2         time3         comment1
 #2:   2   b info2         time4         time5                       comment4
 #   2_action_comment 3_action_comment
 #1:         comment2         comment3
 #2:         comment5    

Or use reshape from base R. We create the sequence variable ('ind') with ave and reshape to change from 'long' to 'wide' format.

df$ind <- with(df, ave(seq_along(id1), id1, id2, info, FUN=seq_along))
reshape(df, idvar=c('id1', 'id2', 'info'),timevar='ind', direction='wide')
#  id1 id2  info action_time.1 action_comment.1 action_time.2 action_comment.2
#1   1   a info1         time1         comment1         time2         comment2
#4   2   b info2         time4         comment4         time5         comment5
#  action_time.3 action_comment.3
#1         time3         comment3
#4          <NA>             <NA>

data

df <- structure(list(id1 = c(1L, 1L, 1L, 2L, 2L), id2 = c("a", "a", 
"a", "b", "b"), info = c("info1", "info1", "info1", "info2", 
"info2"), action_time = c("time1", "time2", "time3", "time4", 
"time5"), action_comment = c("comment1", "comment2", "comment3", 
"comment4", "comment5")), .Names = c("id1", "id2", "info", "action_time", 
"action_comment"), class = "data.frame", row.names = c(NA, -5L))

Not a straight forward solution, but works

library(tidyr)
a = spread(df, action_comment, action_time); 
b = spread(df, action_time, action_comment); 

# dropping NAs and shifting the values to left row wise 
a[] = t(apply(a, 1, function(x) `length<-`(na.omit(x), length(x))))
b[] = t(apply(b, 1, function(x) `length<-`(na.omit(x), length(x))))

out = merge(a,b, by = c('id1','id2','info'))
out[, colSums(is.na(out)) != nrow(out)]

#  id1 id2  info comment1 comment2 comment3    time1    time2    time3
#1   1   a info1    time1    time2    time3 comment1 comment2 comment3
#2   2   b info2    time4    time5     <NA> comment4 comment5     <NA>
标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!