I have 10 text files containing 1000\'s of rows.
Example: First File:
V1 V2
1 2
2 3
10 20
1 4
.....
Second file:
<For simplicity, imagine that you have only four files with these numbers:
m <- cbind(1, c(1,2,3,5), c(2,3,4,6))
m <- rbind(m, cbind(2, c(2,3,5), c(3,6,1)))
m <- rbind(m, cbind(3, c(1,2), c(4,6)))
m <- rbind(m, cbind(4, c(2,2,4), c(4,6,5)))
colnames(m) <- c("file", "val1", "val2")
m
file val1 val2
[1,] 1 1 2
[2,] 1 2 3
[3,] 1 3 4
[4,] 1 5 6
[5,] 2 2 3
[6,] 2 3 6
[7,] 2 5 1
[8,] 3 1 4
[9,] 3 2 6
[10,] 4 2 4
[11,] 4 2 6
[12,] 4 4 5
Then you can get a table of the counts of each pair in each file:
vals <- apply(m[,2:3], 1, paste, collapse = "")
m <- cbind(m[,1], vals)
tab <- table(m[,1], m[,2])
tab
12 14 23 24 26 34 36 45 51 56
1 1 0 1 0 0 1 0 0 0 1
2 0 0 1 0 0 0 1 0 1 0
3 0 1 0 0 1 0 0 0 0 0
4 0 0 0 1 1 0 0 1 0 0
Now you can format the output
res <- cbind(colnames(tab), apply(tab, 2, paste, collapse = ""))
res <- as.vector(apply(res, 1, paste, collapse = " "))
res
[1] "12 1000" "14 0010" "23 1100" "24 0001" "26 0011" "34 1000" "36 0100"
[8] "45 0001" "51 0100" "56 1000"
cat(paste(res, collapse = "\n"))
12 1000
14 0010
23 1100
24 0001
26 0011
34 1000
36 0100
45 0001
51 0100
56 1000
Here's one way to do it. I'll assume that you've already read your files into a list
of data.frames
, e.g. with L <- lapply(filenames, read.table)
. I'll simulate this L
below.
# Create dummy data
L <- replicate(5, expand.grid(1:10, 1:10)[sample(100, 10), ], simplify=FALSE)
# Add a column to each data.frame in L.
# This will indicate presence of the pair when we merge.
L <- lapply(seq_along(L), function(i) {
L[[i]][, paste0('DF', i)] <- 1
L[[i]]
})
# Merge all the things
# (hat-tip to @Charles - https://stackoverflow.com/a/8097519/489704)
L.merged <- Reduce(function(...) merge(..., all=T), L)
head(L.merged)
# Var1 Var2 DF1 DF2 DF3 DF4 DF5
# 1 1 2 NA NA NA 1 NA
# 2 1 5 1 NA NA NA 1
# 3 1 9 NA NA NA 1 NA
# 4 1 10 NA NA 1 1 NA
# 5 2 5 NA NA 1 NA NA
# 6 2 6 NA 1 NA 1 NA
Easy enough to convert the NA
to 0
if you want, e.g. with:
L.merged[is.na(L.merged)] <- 0
Relevant post: Merge multiple data frames in a list simultaneously
Try this example:
#dummy files
file1 <- read.table(text="V1 V2
1 2
2 3
10 20
1 4 ",header=TRUE)
file2 <- read.table(text="
V1 V2
1 2
8 10",header=TRUE)
#make list of files - this can be done by lapply(list.files(),read.table)
all_files <- list(file1,file2)
#get unique pairs
unique_pairs <- unique(do.call(rbind,all_files))
#add new pair that is not in file1 and file2
unique_pairs <- rbind(unique_pairs,c(33,22))
#loop through pairs and files
result <- sapply(1:nrow(unique_pairs),
function(x)
sapply(all_files,function(myfile){
#merge pair with file then check number of rows
d <- merge(myfile,unique_pairs[x,,drop=FALSE])
ifelse(nrow(d)==0,0,1)
}))
#output
cbind(unique_pairs,t(result))
# V1 V2 1 2
# 1 1 2 1 1
# 2 2 3 1 0
# 3 10 20 1 0
# 4 1 4 1 0
# 6 8 10 0 1
# 61 33 22 0 0