I have a dataframe, which I read by Match <- read.table(\"Match.txt\", sep=\"\", fill =T, stringsAsFactors = FALSE, quote = \"\", header = F) and looks like
Maybe not the best use of stringr or tidyr, but this can be done in the hadleyverse in a somewhat readable manner...
The logic flow is:
tidyr::fill with ifelse("Inspecting", rowname, NA). dcast) to get the format that you want.library(dplyr)
library(tidyr)
library(reshape2)
library(stringr)
is_in <- function(v1part) {
return(ifelse(length(v1part) > 0, "B", "U"))
}
ab1<- ab %>%
add_rownames() %>%
mutate(rowname = ifelse(V1=="Inspecting", rowname, NA),
V4a = ifelse(V4 == "(-)" | V4 == "(+)", NA, V4),
chr = str_extract_all(ab$V4, "^chr[^:]+", simplify = T)[,1],
chr = ifelse(chr=="", NA, chr),
start = str_split_fixed(V4a, ":|-", 3)[,2],
start = ifelse(start=="", NA, start),
stop = str_split_fixed(V4a, ":|-", 3)[,3],
stop = ifelse(stop=="", NA, stop),
V1part = str_split_fixed(V1, "\\$|_", 3)[,2]) %>%
fill(rowname, .direction="down") %>%
group_by(rowname) %>%
fill(chr, .direction="down") %>%
fill(start, .direction="down") %>%
fill(stop, .direction="down") %>%
dcast(chr+start+stop ~ V1part, fun.aggregate=is_in)
> ab1
chr start stop Var.4 ATF3 CEBPB YY1
1 chr1 173244300 173244500 B B B B
2 chr1 173244350 173244550 B B B U