问题
I would like to paste strings from 2 dfs n
and p
- dput
at the end.
They have different sizesnrow(n) = 25
and nrow(p) = 20
with two factors : factor1
(binary) and factor2
(integers)
head(n,3) head(p,3)
string factor1 factor2 string factor1 factor2
-- -- -- -- -- --
h f1 5 i f1 1
h f1 6 c f1 2
h f1 7 c f1 3
tail(n,3) tail(p,3)
string factor1 factor2 string factor1 factor2
-- -- -- -- -- --
a f2 27 h f2 18
g f2 28 i f2 19
b f2 29 i f2 20
Here, I would like to create a dataframe
- which does not omit any factors
- pastes the strings of n and p when set of factors are the same
- if only one unique set of factors is available, paste one value
output <- paste (p - n) # error n an p different length
output <- merge (p,n, all=T) # merge into one df
output <- tapply(output, 1, paste) # same error
output <- tapply(output[which((output$factor == output$factor & output$factor2 == output$factor2 ))], 1, paste) # nonsensical
Apologies for the lack of "minimal code"...
----
Intended output:
head(output) tail(output)
string factor factor2 string factor factor2
-- -- -- -- -- --
i f1 1 g f2 24
c f1 2 e f1 25
c f1 3 j f1 26
g f1 4 a f2 27
fh f1 5 g f2 28
ih f1 6 b f2 29
-----
> dput(n)
structure(list(string = structure(c(7L, 7L, 7L, 4L, 5L, 2L, 2L,
1L, 4L, 1L, 1L, 2L, 3L, 1L, 4L, 1L, 8L, 8L, 2L, 6L, 5L, 8L, 1L,
6L, 2L), .Label = c("a", "b", "c", "d", "e", "g", "h", "j"), class = "factor"),
factor = c("f1", "f1", "f1", "f1", "f1", "f1", "f1", "f1",
"f1", "f1", "f2", "f2", "f2", "f2", "f2", "f2", "f2", "f2",
"f2", "f2", "f1", "f1", "f2", "f2", "f2"), factor2 = 5:29), .Names = c("string",
"factor", "factor2"), row.names = c(NA, -25L), class = "data.frame")
> dput(p)
structure(list(string = structure(c(5L, 1L, 1L, 3L, 2L, 5L, 5L,
6L, 4L, 6L, 6L, 5L, 4L, 6L, 6L, 6L, 6L, 4L, 5L, 5L), .Label = c("c",
"f", "g", "h", "i", "j"), class = "factor"), factor = c("f1",
"f1", "f1", "f1", "f1", "f1", "f1", "f1", "f1", "f1", "f2", "f2",
"f2", "f2", "f2", "f2", "f2", "f2", "f2", "f2"), factor2 = 1:20), .Names = c("string",
"factor", "factor2"), row.names = c(NA, -20L), class = "data.frame")
回答1:
Using dplyr
and purrr
we can first do full_join
, then paste
a vector of the two strings from which we omit the NAs:
library(tidyverse)
full_join(n, p, by = c('factor', 'factor2')) %>%
mutate(string = map2(as.character(string.x), as.character(string.y),
~paste0(na.omit(c(.y, .x)), collapse = ''))) %>%
select(-string.x, -string.y)
factor factor2 string 1 f1 5 fh 2 f1 6 ih 3 f1 7 ih 4 f1 8 jd 5 f1 9 he 6 f1 10 jb 7 f1 11 b 8 f1 12 a 9 f1 13 d 10 f1 14 a 11 f2 15 ja 12 f2 16 jb 13 f2 17 jc 14 f2 18 ha 15 f2 19 id 16 f2 20 ia 17 f2 21 j 18 f2 22 j 19 f2 23 b 20 f2 24 g 21 f1 25 e 22 f1 26 j 23 f2 27 a 24 f2 28 g 25 f2 29 b 26 f1 1 i 27 f1 2 c 28 f1 3 c 29 f1 4 g 30 f2 11 j 31 f2 12 i 32 f2 13 h 33 f2 14 j
In base R:
np <- merge(n, p, c('factor', 'factor2'), all = TRUE)
np$string <- mapply(function(x, y) paste0(na.omit(c(x, y)), collapse = ''),
as.character(np$string.y), as.character(np$string.x))
np[, -c(3:4)]
来源:https://stackoverflow.com/questions/42675184/pasting-two-dataframes-of-different-sizes