Reshape a dataframe to long format with multiple sets of measure columns [duplicate]

允我心安 提交于 2019-11-27 09:47:11

If the 'year', 'pop', columns are alternating, we can subset with c(TRUE, FALSE) to get the columns 1, 3, 5,..etc. and c(FALSE, TRUE) to get 2, 4, 6,.. due to the recycling. Then, we unlist the columns and create a new 'data.frame.

 df2 <- data.frame(year=unlist(df1[c(TRUE, FALSE)]), 
                  pop=unlist(df1[c(FALSE, TRUE)]))
 row.names(df2) <- NULL
 head(df2)
 #   year    pop
 #1            
 #2 16XX 4675,0
 #3 17XX 4739,3
 #4 17XX 4834,0
 #5 180X 4930,0
 #6 180X 5029,0

Or another option is

library(splitstackshape)
merged.stack(transform(df1, id=1:nrow(df1)), var.stubs=c('year', 'pop'), 
        sep='var.stubs')[order(.time_1), 3:4, with=FALSE]

data

df1 <- structure(list(year1 = c("", "16XX", "17XX", "17XX", "180X", 
"180X", "181X", "181X", "182X", "182X"), pop1 = c("", "4675,0", 
"4739,3", "4834,0", "4930,0", "5029,0", "5129,0", "5231,9", "5297,0", 
"5362,0"), year2 = c(NA, 1900L, 1901L, 1902L, 1903L, 1904L, 1905L, 
1906L, 1907L, 1908L), pop2 = c("", "6453,0", "6553,5", "6684,0", 
"6818,0", "6955,0", "7094,0", "7234,7", "7329,0", "7422,0"), 
year3 = c(NA, 1930L, 1931L, 1932L, 1933L, 1934L, 1935L, 1936L, 
1937L, 1938L), pop3 = c("", "9981,2", "", "", "", "", "", 
"", "", "")), .Names = c("year1", "pop1", "year2", "pop2", 
"year3", "pop3"), class = "data.frame", row.names = c(NA, -10L))

Using the new feature in melt from data.table v1.9.5+:

require(data.table) # v1.9.5+
melt(setDT(df), measure = patterns("^year", "^pop"), value.name = c("year", "pop"))

You can find the rest of the vignettes here.

Another option is to use split.default to split the dataframe in a list of dataframes and then bind them together:

lst <- lapply(split.default(df1, sub('.*(\\d)', '\\1', names(df1))),
              setNames, c('year','pop'))

do.call(rbind, lst)

which gives the desired result:

    year     pop
1.1 16XX  4675,0
1.2 17XX  4739,3
1.3 17XX  4834,0
1.4 180X  4930,0
1.5 180X  5029,0
1.6 181X  5129,0
1.7 181X  5231,9
1.8 182X  5297,0
1.9 182X  5362,0
2.1 1900  6453,0
2.2 1901  6553,5
2.3 1902  6684,0
2.4 1903  6818,0
2.5 1904  6955,0
2.6 1905  7094,0
2.7 1906  7234,7
2.8 1907  7329,0
2.9 1908  7422,0
3.1 1930  9981,2
3.2 1931 10583,5
3.3 1932  8671,0
3.4 1933  9118,0
3.5 1934  9625,0
3.6 1935  8097,0
3.7 1936  7984,7
3.8 1937  8729,0
3.9 1938 10462,0

You could also use rbindlist from the data.table package for the last step:

library(data.table)
rbindlist(lst)

Used data:

df1 <- structure(list(year1 = c("16XX", "17XX", "17XX", "180X", "180X", "181X", "181X", "182X", "182X"),
                      pop1 = c("4675,0", "4739,3", "4834,0", "4930,0", "5029,0", "5129,0", "5231,9", "5297,0", "5362,0"),
                      year2 = c(1900L, 1901L, 1902L, 1903L, 1904L, 1905L, 1906L, 1907L, 1908L),
                      pop2 = c("6453,0", "6553,5", "6684,0", "6818,0", "6955,0", "7094,0", "7234,7", "7329,0", "7422,0"), 
                      year3 = c(1930L, 1931L, 1932L, 1933L, 1934L, 1935L, 1936L, 1937L, 1938L),
                      pop3 = c("9981,2", "10583,5", "8671,0", "9118,0", "9625,0", "8097,0", "7984,7", "8729,0", "10462,0")),
                 .Names = c("year1", "pop1", "year2", "pop2", "year3", "pop3"), class = "data.frame", row.names = c(NA, -9L))
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!