r Replace only some table values with values from alternate table

白昼怎懂夜的黑 提交于 2019-12-02 05:05:23

Assuming you want to replace NAs in onPO with values from Backordered here is a solution using dplyr::left_join:

library(dplyr);
left_join(inv, svc) %>%
    mutate(onPO = ifelse(is.na(onPO), Backordered, onPO)) %>%
    select(-Backordered, -`Rcv'd`);
#         Item onHand demand onPO
#1    10100200    600   3300 2700
#2    10100201     NA     NA   20
#3    10100202     39     40    1
#4    10100203      0     40   40
#5    10100204     NA     NA  100
#6  10100205-A     NA     NA   18
#7    10100206     40     70   30
#8    10100207      0    126  126
#9    10100208      0     10   10
#10   10100209      0     10   10
#11   10100210      0    250  250

Or a solution in base R using merge:

inv$onPO <- with(merge(inv, svc, all.x = TRUE), ifelse(is.na(onPO), Backordered, onPO))

Or using coalesce instead of ifelse (thanks to @thelatemail):

library(dplyr);
left_join(inv, svc) %>%
    mutate(onPO = coalesce(onPO, Backordered)) %>%
    select(-Backordered, -`Rcv'd`);

In data.table world, this is an "update-join". Join on "Item" and then update the values in the original set with the values from the new set:

library(data.table)
setDT(inv)
setDT(svc)

inv[svc, on="Item", c("onPO","onHand") := .(i.Backordered, `i.Rcv'd`)]

#inv   original table
#svc   update table
#on=   match on specified variable
# :=   overwrite  onPO    with  Backordered
#                 onHand  with  Rcv'd


#          Item onHand demand onPO
# 1:   10100200    600   3300 2700
# 2:   10100201      0     NA   20
# 3:   10100202     39     40    1
# 4:   10100203      0     40   40
# 5:   10100204      0     NA  100
# 6: 10100205-A     44     NA   18
# 7:   10100206     40     70   30
# 8:   10100207      0    126  126
# 9:   10100208      0     10   10
#10:   10100209      0     10   10
#11:   10100210      0    250  250

Starting with the tables:

  >inv
          Item OnHand Demand OnPO
 1:   10100200    600   3300 2700
 2:   10100201     NA     NA   NA
 3:   10100202     39     40    1
 4:   10100203      0     40   40
 5:   10100204     NA     NA   NA
 6: 10100205-A     NA     NA   NA
 7:   10100206     40     70   30
 8:   10100207      0    126  126
 9:   10100208      0     10   10
10:   10100209      0     10   10
11:   10100210      0    250  250

> svc
         Item Rcv'd Backordered
1:   10100201     0          20
2:   10100204     0         100
3: 10100205-A    44          18

After far more cursing than I'd like to admit, the simple solution that works on the above test data, and my live data proved to be:

# Insert OnHand and OnPO data from svc
for (i in 1:nrow(inv)) {
  if(inv$Item[i] %in% svc$Item) {
    x <- which(svc$Item == inv$Item[i])
    inv$OnPO[i] <- svc$Backordered[x]
    inv$OnHand[i] <- svc$`Rcv'd`[x]
  } 
    else{}
}
# cleanup 
inv[is.na(inv)] <- 0

Is there a simpler or more obvious method that I've overlooked?

We could use eat from my package safejoin, and "patch" the matches from the rhs into the lhs when columns conflict.

We rename Backordered to onPO on the way so the two columns conflict as desired.

# devtools::install_github("moodymudskipper/safejoin")
library(safejoin)
library(dplyr)

eat(inv, svc, onPO = Backordered, .conflict = "patch")
#          Item onHand demand onPO
# 1    10100200    600   3300 2700
# 2    10100201     NA     NA   20
# 3    10100202     39     40    1
# 4    10100203      0     40   40
# 5    10100204     NA     NA  100
# 6  10100205-A     NA     NA   18
# 7    10100206     40     70   30
# 8    10100207      0    126  126
# 9    10100208      0     10   10
# 10   10100209      0     10   10
# 11   10100210      0    250  250
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!