data frame lookup value in range and return different column

泄露秘密 提交于 2019-12-01 07:04:31

Perhaps you can use foverlaps from the "data.table" package.

library(data.table)
DT1 <- data.table(DF1)
DT2 <- data.table(DF2)
setkey(DT2, ID, start, end)
DT1[, c("start", "end") := pos]  ## I don't know if there's a way around this step...
foverlaps(DT1, DT2)
#     ID start  end annot pos i.start i.end
# 1: chr     1  200    a1  12      12    12
# 2: chr   540 1002    a3 542     542   542
# 3: chr   540 1002    a3 674     674   674
foverlaps(DT1, DT2)[, c("ID", "pos", "annot"), with = FALSE]
#     ID pos annot
# 1: chr  12    a1
# 2: chr 542    a3
# 3: chr 674    a3

As mentioned by @Arun in the comments, you can also use which = TRUE in foverlaps to extract the relevant values:

foverlaps(DT1, DT2, which = TRUE)
#    xid yid
# 1:   1   1
# 2:   2   3
# 3:   3   3
DT2$annot[foverlaps(DT1, DT2, which = TRUE)$yid]
# [1] "a1" "a3" "a3"

You could also use IRanges

source("http://bioconductor.org/biocLite.R")
biocLite("IRanges")
library(IRanges)
DF1N <- with(DF1, IRanges(pos, pos))
DF2N <- with(DF2, IRanges(start, end))
DF1$name <- DF2$annot[subjectHits(findOverlaps(DF1N, DF2N))]
DF1
#   ID pos name
#1 chr  12   a1
#2 chr 542   a3
#3 chr 674   a3
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!