问题
I have a data frame of plant plantsp
and herbivore lepsp
species and their interactions int1
and int2
with sampling nested in site
, season
and group
. I wish to create a loop that makes pairwise comparisons among each level of group
collected within each site
and season
subset. Fore each pairwise comparison I will calculate total MATCHING and UNIQUE interactions among int1
and int2
. I have devised the following steps to break down this problem:
Consider the following example data frame df
:
sub<-data.frame(site= rep(1, 8),
season=rep("wet", 8),
group= c(1,1,1,2,2,3,3,3),
plantsp= c("P1", "P1", "P2", "P1", "P2", "P1", "P2","P2"),
lepsp= c("L3", "L1", "L2", "L1", "L2", "L1", "L1","L2"),
psitsp=c(NA, "psit1", NA, NA,NA, NA,NA, NA))
sub2<-data.frame(site= rep(1, 8),
season=rep("dry", 8),
group= c(1,1,1,2,2,3,3,3),
plantsp= c("P1", "P1", "P2", "P1", "P2", "P1", "P2","P2"),
lepsp= c("L3", "L1", "L2", "L1", "L2", "L1", "L1","L2"),
psitsp=c(NA, "psit1", NA, NA,NA, NA,NA, NA))
sub3<-data.frame(site= rep(2, 8),
season=rep("wet", 8),
group= c(1,1,1,2,2,3,3,3),
plantsp= c("P1", "P1", "P2", "P1", "P2", "P1", "P2","P2"),
lepsp= c("L3", "L1", "L2", "L1", "L2", "L1", "L1","L2"),
psitsp=c(NA, "psit1", NA, NA,NA, NA,NA, NA))
sub4<-data.frame(site= rep(2, 8),
season=rep("dry", 8),
group= c(1,1,1,2,2,3,3,3),
plantsp= c("P1", "P1", "P2", "P1", "P2", "P1", "P2","P2"),
lepsp= c("L3", "L1", "L2", "L1", "L2", "L1", "L1","L2"),
psitsp=c(NA, "psit1", NA, NA,NA, NA,NA, NA))
df<- rbind(sub, sub2, sub3, sub4)
df$int1<- paste( df$plantsp, df$lepsp, sep="_")
df$int2<-paste( df$lepsp, df$psitsp, sep="_")
df
Step 1: Subset df
by site
and season
. Example:
sub1<- split(df,list(df$site, df$season))
sub1
Step 2: Subset df
by group
. Example:
sub2 <- split(sub1[[1]], sub1[[1]][[3]])
sub2
Step 3: We will call each list element in sub2
a group. Example:
#group1
group1<-sub2[1]
group1
#group2
group2<-sub2[2]
group2
Step 4: I want to make pairwise comparisons among each group
. For each pairwise comparison I want to create vectors that summarize counts of UNIQUE and MATCHING elements among int1
and int2
. This will be iterated though df
for all possible pairwise comparisons among all groups for all subsets. Example for group1
and group2
:
#CALCULATE MATCHING ELEMENTS
#Count matches in `int1` among both levels of `group`
match1<- length(intersect(sub2[[1]][[7]], sub2[[2]][[7]])) # P1_L1 & P2_L2
match1
#Count matches in `int2` among both levels of `group`. Exclude `int1` or `int2` with NAs
temp<-lapply(sub2, na.omit)
temp
match2<- length(intersect(temp[[1]][[8]],temp[[2]][[8]]))
match2
#SUM `match1` and `match2` and put result into vector called `vecA`.
#`vecA`: represents vector of sums of the counts of MATCHING items in
# both groups within `int1` AND `int2` columns.
vecA<-sum(match1, match2)
vecA
#CALCULATE UNIQUE ELEMENTS TO GROUP1
#Count unique items in `int1` within the first level of `group`
unique_int1<- df[1,] # P1_L3
unique_int1<- length(unique_int1$int1)
#Count unique items in `int2` within the first level of `group`
unique_int2<- df[2,] #L1_psit1
unique_int2<- length(unique_int2$int2)
#SUM `unique_int1` and `unique_int2` and put result into vector called
#`vecB`.`vecB`: represents vector of sums of `int1` AND `int2` that
#are UNIQUE to `group1` in the pairwise comparison
vecB<-sum(unique_int1, unique_int2)
vecB
#CALCULATE UNIQUE ELEMENTS TO GROUP2
#Count unique items in `int1` to `group2`
unique_int1<- 0
#Count unique items in `int2` within the first level of `group`
unique_int2<- 0
#SUM `unique1_int1` and `unique1_int2` and put result into vector
#called `vecC`.`vecC`: represents vector of sums of `int1` AND `int2`
#that are UNIQUE to `group2` in the pairwise comparison
vecC<-sum(unique_int1, unique_int2)
vecC
The expected result for all pairwise comparisons for all subsets given df
and the steps above is:
result1<-data.frame(site= c(rep(1, 6),rep(2, 6)),
season=c(rep("wet", 3), rep("dry", 3), rep("wet", 3), rep("dry", 3)),
group_pairs= c("1_2", "2_3", "1_3", "1_2", "2_3", "1_3","1_2", "2_3", "1_3", "1_2", "2_3", "1_3"),
vecA= c(2,2,2,2,2,2,2,2,2,2,2,2),
vecB= c(2,0,2,2,0,2,2,0,2,2,0,2),
vecC=c(0,1,0,0,1,0,0,1,0,0,1,0))
Step 5: Conduct steps above but ONLY for species present in BOTH levels of group
.
#CALCULATE MATCHING ELEMENTS
#If `plantsp` OR `lepsp` match among both levels of `group`,count matches in `int1`.
match1<- length(intersect(sub2[[1]][[7]], sub2[[2]][[7]]))
match1
# If `lepsp` OR `psitsp` match among both levels of `group`, count matches in `int2`. Remove NAs
temp<-lapply(sub2, na.omit)
temp
match2<- length(intersect(temp[[1]][[8]], temp[[2]][[8]]))
match2
#SUM `match1` and `match2` above and put result into vector called `vecD`. `vecD`: vector of sums of MATCHING items in `int1` and `int2` after subsetting for those species both levels of group share.
vecD<- sum(match1, match2)
#CALCULATE UNIQUE ELEMENTS TO GROUP1
# If `plantsp` OR `lepsp` match among both levels of `group`, count unique items in `int1`. This is represented by the P1_L3 interaction in `int1`
unique_int1<-1
# If `lepsp` and `psitsp` match among both levels of `group`, count unique items in `int2`. This is represented by the L1_psit1 interaction in `int2`
unique_int2<-1
# SUM `unique_int1` and `unique_int2` above and put result into vector called `vecE`. `vecE`: vector of sums of UNIQUE items to the FIRST level of `group` included in the pairwise comparison after after subsetting for those species both levels of group share.
vecE<- sum(unique_int1, unique_int2)
#CALCULATE UNIQUE ELEMENTS TO GROUP2
# If `plantsp` OR `lepsp` match among both levels of `group`, count unique items in `int1`.
unique_int1<-0
# If `lepsp` and `psitsp` match among both levels of `group`, count unique items in `int2`.
unique_int2<-0
# SUM `unique_int1` and `unique_int2` above and put result into vector called `vecF`. `vecF`: vector of sums of UNIQUE items to the SECOND level of `group` included in the pairwise comparison after after subsetting for those species both levels of group share.
vecE<- sum(unique_int1, unique_int2)
The expected result for all pairwise comparisons for all subsets given df
and the steps above is:
result2<-data.frame(site= c(rep(1, 6),rep(2, 6)),
season=c(rep("wet", 3), rep("dry", 3), rep("wet", 3), rep("dry", 3)),
group_pairs= c("1_2", "2_3", "1_3", "1_2", "2_3", "1_3","1_2", "2_3", "1_3", "1_2", "2_3", "1_3"),
vecD= c(2,2,2,2,2,2,2,2,2,2,2,2),
vecE= c(0,0,0,0,0,0,0,0,0,0,0,0),
vecF=c(0,1,1,0,1,1,0,1,1,0,1,1))
A similar question is posted here, however this approach is unique for all pairwise comparisons among groups.
回答1:
I'm afraid I can't follow all the steps of this question, but I hope this gets you started.
Here's a way to get all the pairwise matches of int1
between different group
s within each site
+ season
. This is accomplished by joining a list of all the existing site
/ season
/ group
/ int1
combinations with itself. That way we get a row for every pair of groups with matching site/season/int1. Then we can limit those to non-matching groups in ascending order, and count the number of rows that are produced for each set we're tracking. The last unite
step renames the two group columns into one.
library(tidyverse)
df %>%
distinct(site, season, group, int1) -> temp
left_join(temp, temp, by = c("site", "season", "int1")) %>%
filter(group.x < group.y, !is.na(int1)) %>%
count(site, season, group.x, group.y, name = "vecD") %>%
unite(group_pairs, c(group.x, group.y))
# A tibble: 12 x 4
site season group_pairs vecD
<dbl> <fct> <chr> <int>
1 1 wet 1_2 2
2 1 wet 1_3 2
3 1 wet 2_3 2
4 1 dry 1_2 2
5 1 dry 1_3 2
6 1 dry 2_3 2
7 2 wet 1_2 2
8 2 wet 1_3 2
9 2 wet 2_3 2
10 2 dry 1_2 2
11 2 dry 1_3 2
12 2 dry 2_3 2
回答2:
Here's a slightly different approach using data.table.
library(data.table)
dt <- as.data.table(df)
dt[,
{
groups <- combn(unique(group), 2)
group_pairs = apply(groups, 2, paste, collapse = '_')
vecA = apply(groups, 2, FUN = function(x) length(intersect(group, x[1])) + length(intersect(group, x[2])))
#apply(groups, 2, function(x) .SD[group %in% x, print(.SD)])
list(group_pairs = group_pairs, vecA = vecA)
}
,
by = .(site, season)]
site season group_pairs vecA
1: 1 wet 1_2 2
2: 1 wet 1_3 2
3: 1 wet 2_3 2
4: 1 dry 1_2 2
5: 1 dry 1_3 2
6: 1 dry 2_3 2
7: 2 wet 1_2 2
8: 2 wet 1_3 2
9: 2 wet 2_3 2
10: 2 dry 1_2 2
11: 2 dry 1_3 2
12: 2 dry 2_3 2
Note for vecA
I steal your code. Unfortunately, your code doesn't have similar explanations for vecB
and so on. It's just simply states unique_int1 <- 1; unique_int2 <- 1; vecB<-sum(unique_int1, unique_int2)
with no equation.
Here's the data itself for group1
:
> group1
$`1`
site season group plantsp lepsp psitsp int1 int2
1 1 wet 1 P1 L3 <NA> P1_L3 L3_NA
2 1 wet 1 P1 L1 psit1 P1_L1 L1_psit1
3 1 wet 1 P2 L2 <NA> P2_L2 L2_NA
If you uncomment out the apply
line in my code, you get the following printout (truncated for brevity):
#site 1, season == wet
group plantsp lepsp psitsp int1 int2
1: 1 P1 L3 <NA> P1_L3 L3_NA
2: 1 P1 L1 psit1 P1_L1 L1_psit1
3: 1 P2 L2 <NA> P2_L2 L2_NA
4: 2 P1 L1 <NA> P1_L1 L1_NA
5: 2 P2 L2 <NA> P2_L2 L2_NA
Maybe you can take that apply()
statement and run with it.
来源:https://stackoverflow.com/questions/58295133/return-counts-of-matches-and-unique-items-for-all-pairwise-comparisons-within-su