R: Adding NAs into Data Frame

后端 未结 5 880
刺人心
刺人心 2020-12-11 18:04

I have a data frame like so:

Name   Position   Value
a         1        0.2
a         3        0.4
a         4        0.3
b         1        0.5
b         2          


        
相关标签:
5条回答
  • 2020-12-11 18:19

    You can use the reshape2 package:

    # make sample data frame
    df <- read.table(text = "Name   Position   Value
    a         1        0.2
    a         3        0.4
    a         4        0.3
    b         1        0.5
    b         2        0.4
    b         5        0.3
    c         2        0.3
    c         3        0.4
    c         5        0.1
    d         1        0.2
    d         2        0.4
    d         3        0.5", header = TRUE, stringsAsFactors = FALSE)
    
    library('reshape2')
    df2 <- dcast(df, Name ~ Position)
    df3 <- melt(df2, value.name = "Value", variable.name = "Position")
    df3[order(df3$Name), ]
    #    Name Position Value
    # 1     a        1   0.2
    # 5     a        2    NA
    # 9     a        3   0.4
    # 13    a        4   0.3
    # 17    a        5    NA
    # 2     b        1   0.5
    # 6     b        2   0.4
    # 10    b        3    NA
    # 14    b        4    NA
    # 18    b        5   0.3
    # 3     c        1    NA
    # 7     c        2   0.3
    # 11    c        3   0.4
    # 15    c        4    NA
    # 19    c        5   0.1
    # 4     d        1   0.2
    # 8     d        2   0.4
    # 12    d        3   0.5
    # 16    d        4    NA
    # 20    d        5    NA
    
    0 讨论(0)
  • 2020-12-11 18:20

    Here are a couple of base solutions:

    as.data.frame.table(tapply(df[[3]], df[2:1], c))
    

    and

    merge(df, 
          expand.grid(Position = unique(df$Position), Name = unique(df$Name)), 
          all = TRUE)
    
    0 讨论(0)
  • 2020-12-11 18:24

    Maybe it is overkill, but I think you can use sqldf to do this:

    library(sqldf)
    # Your data frame:
    df <- data.frame(
      name = c('a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd'),
      position = c(1, 3, 4, 1, 2, 5, 2, 3, 5, 1, 2, 3),
      value = c(0.2, 0.4, 0.3, 0.5, 0.4, 0.3, 0.3, 0.4, 0.1, 0.2, 0.4, 0.5)
    )
    # A data frame to hold the positions you want to fill:
    pos = data.frame(pos = 1:5)
    # SQLdf let's you write SQL sentences that use data frames like SQL tables:
    df2 <- sqldf(
      "select a.*, b.value as value
      from (
        select a.name, p.pos as position 
        from (select distinct name from df) as a, pos as p
      ) as a
      left join df as b on a.name = b.name and a.position = b.position"
    )
    df2
    ## Result:
    ##   name position value
    ##1     a        1   0.2
    ##2     a        2    NA
    ##3     a        3   0.4
    ##4     a        4   0.3
    ##5     a        5    NA
    ##6     b        1   0.5
    ##7     b        2   0.4
    ##8     b        3    NA
    ##9     b        4    NA
    ##10    b        5   0.3
    ##11    c        1    NA
    ##12    c        2   0.3
    ##13    c        3   0.4
    ##14    c        4    NA
    ##15    c        5   0.1
    ##16    d        1   0.2
    ##17    d        2   0.4
    ##18    d        3   0.5
    ##19    d        4    NA
    ##20    d        5    NA
    

    Of course, you can assign the result of sqldf() directly to df to overwrite the original data frame

    0 讨论(0)
  • 2020-12-11 18:26

    I would use data.table but in a different way that @akrun underlined:

    library(data.table)
    dt = as.data.table(df)
    setkey(dt, Name, Position)
    dt[CJ(unique(Name),unique(Position))]
    
    0 讨论(0)
  • 2020-12-11 18:33

    You could use data.table

     library(data.table)
     DT <- data.table(df)
     setkey(DT, Position)
     DT[, .SD[J(1:5), roll=FALSE], by=Name][order(Name, Position),]
     #   Name Position Value
     #1:    a        1   0.2
     #2:    a        2    NA
     #3:    a        3   0.4
     #4:    a        4   0.3
     #5:    a        5    NA
     #6:    b        1   0.5
     #7:    b        2   0.4
     #8:    b        3    NA
     #9:    b        4    NA
    #10:    b        5   0.3
    #11:    c        1    NA
    #12:    c        2   0.3
    #13:    c        3   0.4
    #14:    c        4    NA
    #15:    c        5   0.1
    #16:    d        1   0.2
    #17:    d        2   0.4
    #18:    d        3   0.5
    #19:    d        4    NA
    #20:    d        5    NA
    

    Or you can use tidyr/dplyr

     library(dplyr)
     library(tidyr)
    
      df %>% 
          spread(Position, Value) %>%
          gather(Position, Value, `1`:`5`) %>%
          arrange(Name, Position)    
    

    data

     df <- structure(list(Name = c("a", "a", "a", "b", "b", "b", "c", "c", 
     "c", "d", "d", "d"), Position = c(1L, 3L, 4L, 1L, 2L, 5L, 2L, 
     3L, 5L, 1L, 2L, 3L), Value = c(0.2, 0.4, 0.3, 0.5, 0.4, 0.3, 
     0.3, 0.4, 0.1, 0.2, 0.4, 0.5)), .Names = c("Name", "Position", 
     "Value"), class = "data.frame", row.names = c(NA, -12L))
    
    0 讨论(0)
提交回复
热议问题