Convert list of lists to dataframe

匿名 (未验证) 提交于 2019-12-03 08:54:24

问题:

I got a nested list, named mylist which has length 4.

Each element of this list is an experiment: exp1.1, exp1.2, exp2.1 and exp2.2.

Each experiment contains observations of length (in days) of four plant growth stages: EM-V6 V6-R0 R0-R4 and R4-R9.

Each growth stage is organized as a data frame with year and mean.

Here is the complete data:

mylist=structure(list(exp1.1 = structure(list(`EM-V6` = structure(list(     year = 2011:2100, mean = c(34, 34, 32, 28, 25, 32, 32, 28,      27, 30, 32, 31, 33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27,      30, 29, 31, 34, 30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29,      32, 31, 25, 28, 28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27,      32, 27, 28, 28, 30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26,      28, 31, 30, 27, 26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26,      26, 24, 26, 28, 25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100,      mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30,      32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33,      31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31,      30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29,      31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29,      30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30,      29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100,      mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33,      32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34,      33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32,      31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29,      32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31,      30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30,      31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100,      mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28,      29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30,      28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25,      26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25,      28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25,      25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26,      26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4",  "R4-R9")), exp1.2 = structure(list(`EM-V6` = structure(list(year = 2011:2100,      mean = c(34, 34, 32, 28, 25, 32, 32, 28, 27, 30, 32, 31,      33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27, 30, 29, 31, 34,      30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29, 32, 31, 25, 28,      28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27, 32, 27, 28, 28,      30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26, 28, 31, 30, 27,      26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26, 26, 24, 26, 28,      25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100,      mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30,      32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33,      31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31,      30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29,      31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29,      30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30,      29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100,      mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33,      32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34,      33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32,      31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29,      32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31,      30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30,      31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100,      mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28,      29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30,      28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25,      26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25,      28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25,      25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26,      26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4",  "R4-R9")), exp2.1 = structure(list(`EM-V6` = structure(list(year = 2011:2100,      mean = c(34, 34, 32, 28, 25, 32, 32, 28, 27, 30, 32, 31,      33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27, 30, 29, 31, 34,      30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29, 32, 31, 25, 28,      28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27, 32, 27, 28, 28,      30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26, 28, 31, 30, 27,      26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26, 26, 24, 26, 28,      25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100,      mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30,      32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33,      31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31,      30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29,      31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29,      30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30,      29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100,      mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33,      32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34,      33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32,      31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29,      32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31,      30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30,      31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100,      mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28,      29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30,      28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25,      26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25,      28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25,      25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26,      26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4",  "R4-R9")), exp2.2 = structure(list(`EM-V6` = structure(list(year = 2011:2100,      mean = c(34, 34, 32, 28, 25, 32, 32, 28, 27, 30, 32, 31,      33, 28, 26, 31, 33, 27, 34, 26, 28, 27, 27, 30, 29, 31, 34,      30, 26, 31, 33, 33, 27, 30, 28, 32, 31, 29, 32, 31, 25, 28,      28, 26, 32, 29, 26, 31, 28, 29, 30, 25, 27, 32, 27, 28, 28,      30, 24, 30, 29, 29, 29, 28, 26, 28, 26, 26, 28, 31, 30, 27,      26, 28, 25, 24, 24, 30, 27, 26, 26, 27, 26, 26, 24, 26, 28,      25, 30, 26)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame"), `V6-R0` = structure(list(year = 2011:2100,      mean = c(30, 33, 33, 32, 29, 30, 32, 31, 32, 30, 33, 30,      32, 33, 33, 32, 29, 31, 32, 28, 31, 29, 36, 29, 30, 30, 33,      31, 33, 30, 34, 32, 29, 31, 28, 30, 30, 29, 34, 31, 32, 31,      30, 28, 32, 29, 29, 32, 29, 28, 29, 29, 32, 31, 27, 32, 29,      31, 29, 29, 30, 29, 29, 29, 28, 28, 30, 30, 30, 32, 29, 29,      30, 29, 29, 29, 28, 28, 29, 30, 29, 29, 29, 30, 28, 30, 30,      29, 29, 29)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame"), `R0-R4` = structure(list(year = 2011:2100,      mean = c(31, 32, 32, 33, 32, 32, 33, 31, 34, 32, 33, 33,      32, 31, 33, 31, 32, 32, 32, 30, 32, 31, 34, 30, 31, 32, 34,      33, 34, 32, 36, 33, 32, 32, 31, 30, 32, 32, 32, 32, 32, 32,      31, 30, 30, 31, 32, 32, 30, 30, 32, 31, 31, 32, 30, 32, 29,      32, 31, 30, 32, 30, 30, 31, 32, 30, 31, 30, 31, 32, 31, 31,      30, 30, 30, 31, 30, 30, 31, 30, 31, 30, 30, 30, 31, 32, 30,      31, 30, 30)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame"), `R4-R9` = structure(list(year = 2011:2100,      mean = c(27, 29, 28, 28, 27, 30, 29, 27, 30, 26, 30, 28,      29, 28, 29, 27, 29, 28, 25, 26, 26, 25, 27, 27, 27, 28, 30,      28, 29, 27, 29, 28, 29, 28, 26, 26, 28, 28, 30, 28, 27, 25,      26, 25, 25, 26, 26, 27, 25, 25, 26, 25, 27, 28, 24, 27, 25,      28, 26, 24, 27, 26, 27, 25, 26, 26, 24, 26, 25, 26, 24, 25,      25, 26, 26, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 26,      26, 25, 24)), .Names = c("year", "mean"), row.names = c(NA,  -90L), class = "data.frame")), .Names = c("EM-V6", "V6-R0", "R0-R4",  "R4-R9"))), .Names = c("exp1.1", "exp1.2", "exp2.1", "exp2.2" )) 

What I need to do is to "unlist" this nested list to a data frame that will look like this:

YEAR   EXP   EM-V6   V6-R0   R0-R4   R4-R9 2011  exp1.1  34      30      31      27 2011  exp1.2  34      30      31      27 2011  exp2.1  34      30      31      27 2011  exp1.1  34      30      31      27 

Which means:

 - first year, first experiment, and growth stages.  - first year, second experiment and growth stages.  - first year, third experiment and growth stages  - first year, fourth experiment and growth stages  - second year, first experiment and growth stages 

and so on.

How to perform that data transformation?

回答1:

An alternative using rbindlist from the data.table-package twice:

library(data.table) # bind the dataframes in the 'listed lists' together and include the year with the 'id'-parameter # the resulting 'data.table's are returned as a list step1 <- lapply(mylist, rbindlist, id = 'stages') # bind the resulting list together and include the experiment id step2 <- rbindlist(step1, id = 'experiment') # reshape to wide format dcast(step2, year + experiment ~ stages, value.var = 'mean') 

Or in one go:

dcast(rbindlist(lapply(mylist, rbindlist, id = 'stages'), id = 'experiment'),       year + experiment ~ stages, value.var = 'mean') 

which gives:

     year experiment EM-V6 R0-R4 R4-R9 V6-R0   1: 2011     exp1.1    34    31    27    30   2: 2011     exp1.2    34    31    27    30   3: 2011     exp2.1    34    31    27    30   4: 2011     exp2.2    34    31    27    30   5: 2012     exp1.1    34    32    29    33  ---                                         356: 2099     exp2.2    30    30    25    29 357: 2100     exp1.1    26    30    24    29 358: 2100     exp1.2    26    30    24    29 359: 2100     exp2.1    26    30    24    29 360: 2100     exp2.2    26    30    24    29 


回答2:

Alternate tidyverse:

library(tidyverse)  map_df(mylist, ~bind_rows(., .id="id"), .id="EXP") %>%    spread(id, mean) 


回答3:

We can use tidyverse with more compact and readable code

library(dplyr) library(tidyr) library(purrr) res1 <- mylist %>%             #bind the inner datasets and create an id column             map(bind_rows, .id = "id") %>%             #bind the outer datasets and create an EXP column             bind_rows(.id = "EXP") %>%              #reshape to wide format             spread(id, mean)   head(res1, 4) #     EXP year EM-V6 R0-R4 R4-R9 V6-R0 #1 exp1.1 2011    34    31    27    30 #2 exp1.1 2012    34    32    29    33 #3 exp1.1 2013    32    32    28    33 #4 exp1.1 2014    28    33    28    32 

Or we can approach this by looping through the mylist with lapply, then create a new column 'name' usign Map by cbinding the names of the inner list elements, then rbind the list elements with do.call(rbind, now do a second Map to create a new column based on the names of 'mylist', rbind the list elements and then reshape from base R to convert it to 'wide'

res <- do.call(rbind, Map(cbind, lapply(mylist, function(x)      do.call(rbind, Map(cbind, x, name = names(x)))), EXP= names(mylist))) res2 <- reshape(res, idvar = c("year", "EXP"),                timevar = "name", direction = "wide") row.names(res2) <- NULL head(res2, 4) #   year    EXP mean.EM-V6 mean.V6-R0 mean.R0-R4 mean.R4-R9 #1 2011 exp1.1         34         30         31         27 #2 2012 exp1.1         34         33         32         29 #3 2013 exp1.1         32         33         32         28 #4 2014 exp1.1         28         32         33         28 

NOTE: No external packages used (100% base R)

or use dcast from reshape2 to transform to 'wide' format

library(reshape2) res2 <- dcast(res, year + EXP~name, value.var = "mean")  head(res2, 4) #   year    EXP EM-V6 V6-R0 R0-R4 R4-R9 #1 2011 exp1.1    34    30    31    27 #2 2011 exp1.2    34    30    31    27 #3 2011 exp2.1    34    30    31    27 #4 2011 exp2.2    34    30    31    27 


标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!