How to concatenate data.frame inside lists by using names?

匆匆过客 提交于 2019-12-23 03:09:17

问题


I have to import over 1,000 excel files, and each excel contains multiple sheets (some have the same sheet name and some have different sheet names).

Let's say with a small example as follows

games <- data.frame(index = c(1,2,3), player = c('John', 'Sam', 'Mary'))
weather <- data.frame(index = c(1,2,3), temperature = c('hot', 'cold', 'rainy'))
cars <- data.frame(index = c(1,2,3), car = c('honda', 'toyota','bmw'))
list1 <- list(games, weather, cars)
names(list1) <-  c('games', 'weather', 'cars')

games <- data.frame(index = c(1,2,3), player = c('AA', 'BB', 'CC'))
weather <- data.frame(index = c(1,2,3), temperature = c('cold', 'rainy', 'hot'))
sport <- data.frame(index = c(1,2,3), interest = c('swim', 'soccer', 'rugby'))
list2 <- list(games, weather, sport)
names(list2) <-  c('games', 'weather', 'sport')
list3 <- list(games, weather)
names(list3) <-  c('games', 'weather')

rm(games, sport, weather, cars)  # clean envir from unneeded stuff

I am looking for the way to combine lists by using lists' name. I have tried to use merge() and mapply(), but they did not return what I wanted

The return that I want is as follows:

   $`games`
# A tibble: 6 x 2
  index player
  <dbl> <chr> 
1     1 John  
2     2 Sam   
3     3 Mary  
4     1 AA    
5     2 BB    
6     3 CC    

$weather
# A tibble: 6 x 2
  index temperature
  <dbl> <chr>      
1     1 hot        
2     2 cold       
3     3 rainy      
4     1 cold       
5     2 rainy      
6     3 hot        

$cars
# A tibble: 3 x 2
  index car   
  <dbl> <chr> 
1     1 honda 
2     2 toyota
3     3 bmw   

$sport
  index interest
1     1     swim
2     2   soccer
3     3    rugby

EDIT: I have encountered with the case when there is a data.frame sport in list2 (not in list1)


回答1:


You can use purrr to help manipulate the list. I add the stringAsFactors=FALSE only so that I could bind the data.frame. If you already use tibble, you won't have the issue.

  • I create a list of the lists.
  • transpose change the list to regroup the element by name. Basically, x[[1]][[2]] is equivalent to transpose(x)[[2]][[1]]
  • I use map to iterate through the list, and dplyr::bind_rows to get the resulting tibble.
options(stringsAsFactors = FALSE)
games <- data.frame(index = c(1,2,3), player = c('John', 'Sam', 'Mary'))
weather <- data.frame(index = c(1,2,3), temperature = c('hot', 'cold', 'rainy'))
cars <- data.frame(index = c(1,2,3), car = c('honda', 'toyota','bmw'))
list1 <- list(games, weather, cars)
names(list1) <-  c('games', 'weather', 'cars')

games <- data.frame(index = c(1,2,3), player = c('AA', 'BB', 'CC'))
weather <- data.frame(index = c(1,2,3), temperature = c('cold', 'rainy', 'hot'))
list2 <- list(games, weather)
names(list2) <-  c('games', 'weather')

library(purrr)
list(list1, list2) %>%
  # regroup named element together
  transpose() %>%
  # bind the df together
  map(dplyr::bind_rows)
#> $games
#>   index player
#> 1     1   John
#> 2     2    Sam
#> 3     3   Mary
#> 4     1     AA
#> 5     2     BB
#> 6     3     CC
#> 
#> $weather
#>   index temperature
#> 1     1         hot
#> 2     2        cold
#> 3     3       rainy
#> 4     1        cold
#> 5     2       rainy
#> 6     3         hot
#> 
#> $cars
#>   index    car
#> 1     1  honda
#> 2     2 toyota
#> 3     3    bmw

Created on 2018-11-04 by the reprex package (v0.2.1)

If the first list does not contain all the elements you want, you need to provide the .names argument in transpose. See help("transpose", package = "purrr"). I build an example for that.

options(stringsAsFactors = FALSE)
games <- data.frame(index = c(1,2,3), player = c('John', 'Sam', 'Mary'))
weather <- data.frame(index = c(1,2,3), temperature = c('hot', 'cold', 'rainy'))
list1 <- list(games = games, weather = weather)

games <- data.frame(index = c(1,2,3), player = c('AA', 'BB', 'CC'))
weather <- data.frame(index = c(1,2,3), temperature = c('cold', 'rainy', 'hot'))
cars <- data.frame(index = c(1,2,3), car = c('honda', 'toyota','bmw'))
list2 <- list(games = games, weather = weather, cars = cars)

library(purrr)
all_list <- list(list1, list2)
all_names <- all_list %>% map(names) %>% reduce(union)
list(list1, list2) %>%
  # regroup named element together
  transpose(.names = all_names) %>%
  # bind the df together
  map(dplyr::bind_rows)
#> $games
#>   index player
#> 1     1   John
#> 2     2    Sam
#> 3     3   Mary
#> 4     1     AA
#> 5     2     BB
#> 6     3     CC
#> 
#> $weather
#>   index temperature
#> 1     1         hot
#> 2     2        cold
#> 3     3       rainy
#> 4     1        cold
#> 5     2       rainy
#> 6     3         hot
#> 
#> $cars
#>   index    car
#> 1     1  honda
#> 2     2 toyota
#> 3     3    bmw

Created on 2018-11-04 by the reprex package (v0.2.1)




回答2:


There's an easy way with lapply().

lapply(unique(unlist(lapply(mget(ls(pattern="list")), names))),
       function(x) unique(rbind(list1[[x]], list2[[x]], list3[[x]])))

Use setNames() and dplyr::as_tibble to get list names and tibbles.

Like so:

nms <- unique(unlist(lapply(Lol, names)))

setNames(lapply(lapply(nms, function(x) unique(rbind(list1[[x]], list2[[x]], list3[[x]]))),
            dplyr::as_tibble), nms)

Yields

$`games`
# A tibble: 6 x 2
  index player
* <dbl> <fct> 
1     1 John  
2     2 Sam   
3     3 Mary  
4     1 AA    
5     2 BB    
6     3 CC    

$weather
# A tibble: 6 x 2
  index temperature
* <dbl> <fct>      
1     1 hot        
2     2 cold       
3     3 rainy      
4     1 cold       
5     2 rainy      
6     3 hot        

$cars
# A tibble: 3 x 2
  index car   
* <dbl> <fct> 
1     1 honda 
2     2 toyota
3     3 bmw   

$sport
# A tibble: 3 x 2
  index interest
* <dbl> <fct>   
1     1 swim    
2     2 soccer  
3     3 rugby  

However, if the number of lists is unknown, supposed all your lists in the global environment with pattern "list", you could make following approach .

Lol <- mget(ls(pattern="^list+"))  # list of lists

mergeFun <- function(z) {
  l1 <- lapply(z, 
               function(y) lapply(1:length(y),  # new column w/ sublist names
                                  function(x) cbind(y[[x]], list=names(y)[x])))
  l2 <- unlist(l1, recursive=FALSE)  # unnest lists
  l3 <- Reduce(function(...) merge(..., all=TRUE), l2)  # merge list 
  l4 <- split(l3, l3$list)  # new list of lists by sublist names
  l5 <- lapply(l4, function(w) 
    Filter(function(v) !all(is.na(v)), w[, -2]))  # delete NA cols
  return(lapply(l5, function(u) `rownames<-`(u, NULL)))  # reset row names
}

Do lapply(mergeFun(Lol), dplyr::as_tibble) to obtain tibbles if desired, otherwise just mergeFun(Lol).

Yields

> lapply(mergeFun(Lol), dplyr::as_tibble)
$`games`
# A tibble: 6 x 2
  index player
  <dbl> <fct> 
1     1 John  
2     1 AA    
3     2 Sam   
4     2 BB    
5     3 Mary  
6     3 CC    

$weather
# A tibble: 6 x 2
  index temperature
  <dbl> <fct>      
1     1 cold       
2     1 hot        
3     2 cold       
4     2 rainy      
5     3 hot        
6     3 rainy      

$cars
# A tibble: 3 x 2
  index car   
  <dbl> <fct> 
1     1 honda 
2     2 toyota
3     3 bmw   

$sport
# A tibble: 3 x 2
  index interest
  <dbl> <fct>   
1     1 swim    
2     2 soccer  
3     3 rugby   

Data

list1 <- list(games = structure(list(index = c(1, 2, 3), player = structure(c(1L, 
3L, 2L), .Label = c("John", "Mary", "Sam"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(2L, 
1L, 3L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)), cars = structure(list(index = c(1, 2, 3), car = structure(c(2L, 
3L, 1L), .Label = c("bmw", "honda", "toyota"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)))
list2 <- list(games = structure(list(index = c(1, 2, 3), player = structure(1:3, .Label = c("AA", 
"BB", "CC"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(1L, 
3L, 2L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)), sport = structure(list(index = c(1, 2, 3), interest = structure(3:1, .Label = c("rugby", 
"soccer", "swim"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)))
list3 <- list(games = structure(list(index = c(1, 2, 3), player = structure(1:3, .Label = c("AA", 
"BB", "CC"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(1L, 
3L, 2L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)))


来源:https://stackoverflow.com/questions/53138173/how-to-concatenate-data-frame-inside-lists-by-using-names

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!