pasting two dataframes of different sizes

こ雲淡風輕ζ 提交于 2019-12-12 02:14:48

问题


I would like to paste strings from 2 dfs n and p - dput at the end. They have different sizesnrow(n) = 25 and nrow(p) = 20 with two factors : factor1 (binary) and factor2(integers)


head(n,3)                       head(p,3)
string   factor1  factor2        string    factor1  factor2
--       --       --             --        --       --
h        f1       5              i         f1       1
h        f1       6              c         f1       2
h        f1       7              c         f1       3

tail(n,3)                       tail(p,3)
string   factor1  factor2        string    factor1  factor2
--       --       --             --        --       --
a        f2       27             h         f2       18
g        f2       28             i         f2       19
b        f2       29             i         f2       20

Here, I would like to create a dataframe

  1. which does not omit any factors
  2. pastes the strings of n and p when set of factors are the same
  3. if only one unique set of factors is available, paste one value

output <- paste (p - n) # error n an p different length
output <- merge (p,n, all=T) # merge into one df
output <- tapply(output, 1, paste) # same error
output <- tapply(output[which((output$factor == output$factor & output$factor2 == output$factor2 ))], 1, paste) # nonsensical

Apologies for the lack of "minimal code"...

----

Intended output:

head(output)                   tail(output)
string   factor   factor2        string    factor   factor2
--       --       --             --        --       --
i        f1       1              g         f2       24
c        f1       2              e         f1       25
c        f1       3              j         f1       26
g        f1       4              a         f2       27
fh       f1       5              g         f2       28  
ih       f1       6              b         f2       29  

-----

> dput(n)
    structure(list(string = structure(c(7L, 7L, 7L, 4L, 5L, 2L, 2L, 
1L, 4L, 1L, 1L, 2L, 3L, 1L, 4L, 1L, 8L, 8L, 2L, 6L, 5L, 8L, 1L, 
6L, 2L), .Label = c("a", "b", "c", "d", "e", "g", "h", "j"), class = "factor"), 
    factor = c("f1", "f1", "f1", "f1", "f1", "f1", "f1", "f1", 
    "f1", "f1", "f2", "f2", "f2", "f2", "f2", "f2", "f2", "f2", 
    "f2", "f2", "f1", "f1", "f2", "f2", "f2"), factor2 = 5:29), .Names = c("string", 
"factor", "factor2"), row.names = c(NA, -25L), class = "data.frame")

> dput(p)
     structure(list(string = structure(c(5L, 1L, 1L, 3L, 2L, 5L, 5L, 
6L, 4L, 6L, 6L, 5L, 4L, 6L, 6L, 6L, 6L, 4L, 5L, 5L), .Label = c("c", 
"f", "g", "h", "i", "j"), class = "factor"), factor = c("f1", 
"f1", "f1", "f1", "f1", "f1", "f1", "f1", "f1", "f1", "f2", "f2", 
"f2", "f2", "f2", "f2", "f2", "f2", "f2", "f2"), factor2 = 1:20), .Names = c("string", 
"factor", "factor2"), row.names = c(NA, -20L), class = "data.frame")

回答1:


Using dplyr and purrr we can first do full_join, then paste a vector of the two strings from which we omit the NAs:

library(tidyverse)
full_join(n, p, by = c('factor', 'factor2')) %>% 
  mutate(string = map2(as.character(string.x), as.character(string.y), 
                       ~paste0(na.omit(c(.y, .x)), collapse = ''))) %>% 
  select(-string.x, -string.y)
   factor factor2 string
1      f1       5     fh
2      f1       6     ih
3      f1       7     ih
4      f1       8     jd
5      f1       9     he
6      f1      10     jb
7      f1      11      b
8      f1      12      a
9      f1      13      d
10     f1      14      a
11     f2      15     ja
12     f2      16     jb
13     f2      17     jc
14     f2      18     ha
15     f2      19     id
16     f2      20     ia
17     f2      21      j
18     f2      22      j
19     f2      23      b
20     f2      24      g
21     f1      25      e
22     f1      26      j
23     f2      27      a
24     f2      28      g
25     f2      29      b
26     f1       1      i
27     f1       2      c
28     f1       3      c
29     f1       4      g
30     f2      11      j
31     f2      12      i
32     f2      13      h
33     f2      14      j

In base R:

np <- merge(n, p, c('factor', 'factor2'), all = TRUE)
np$string <- mapply(function(x, y) paste0(na.omit(c(x, y)), collapse = ''), 
                    as.character(np$string.y), as.character(np$string.x))
np[, -c(3:4)]


来源:https://stackoverflow.com/questions/42675184/pasting-two-dataframes-of-different-sizes

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!