问题
I have two lists of gene-lists for example
gene_list_A
$`STEARATE BIOSYNTHESIS I (ANIMALS)%HUMANCYC%PWY-5972`
[1] "ELOVL1" "ACOT7" "ACSL1" "ACSL5" "ACSL4" "ELOVL6" "ACSL3" "ACOT2" "ACOT1" "ACSBG1"
[11] "ACSBG2" "SLC27A2" "ACOT4"
$`SUPERPATHWAY OF INOSITOL PHOSPHATE COMPOUNDS%HUMANCYC%PWY-6371`
[1] "PI4K2B" "MTMR14" "PTEN" "INPPL1" "PIK3CD" "PIK3C2G" "PIK3CB" "PIK3C2A" "PIK3CG" "PIK3C2B"
[11] "PLCZ1" "PPIP5K1" "PPIP5K2" "PIP5KL1" "PLCE1" "PIP4K2A" "PIP4K2B" "PIP4K2C" "SACM1L" "ITPK1"
[21] "IPMK" "OCRL" "ITPKB" "MINPP1" "ITPKC" "PLCB3" "PLCB4" "PIK3CA" "ITPKA" "PIK3C3"
[31] "PLCB1" "PLCB2" "PI4K2A" "TMEM55A" "IPPK" "TMEM55B" "MTMR3" "PIK3R4" "PIK3R3" "PIK3R2"
[41] "PIK3R1" "PIK3R6" "PIK3R5" "INPP5B" "INPP5A" "INPP5D" "INPP5J" "PLCG2" "PIP5K1A" "INPP5K"
[51] "PIP5K1B" "PLCG1" "PIP5K1C" "IP6K1" "CDIPT" "IP6K3" "IP6K2" "SYNJ2" "FIG4" "PIKFYVE"
[61] "SYNJ1" "PLCH1" "PLCH2" "PI4KB" "PLCD3" "PLCD4" "PLCD1"
gene_list_B
$bupropion
[1] "CHRNA1" "CHRNA3" "CHRNA4" "CHRNB1" "CHRNB2" "CHRNB4" "CHRND" "CHRNG" "CYP2C19" "SLC6A2"
[11] "SLC6A3"
$dopamine
[1] "ADRA2A" "APEX1" "CA1" "CA12" "CA14" "CA2" "CA3" "CA4" "CA5A"
[10] "CA5B" "CA6" "CA7" "CA9" "DRD1" "DRD2" "DRD3" "DRD4" "EHMT2"
[19] "FEN1" "HIF1A" "HSD17B10" "KDM4E" "MPHOSPH8" "MTNR1A" "MTNR1B" "NFE2L2" "RECQL"
[28] "SLC6A2" "SLC6A3" "TDP1" "TP53"
The lists are large
> length(gene_list_A)
[1] 10362
> length(gene_list_B)
[1] 5145
I process the two lists through the following functions
gs.RNASeq <- 21196
gom.obj <- newGOM(gene_list_A,gene_list_B,gs.RNASeq)
Intersection <- getMatrix(gom.obj, name="pval")
These lines of code work fine when the length of the gene list is small but crashes when the length is large. As in this case length(gene_list_A) = 10362 and length(gene_list_B) = 5145
As a hack, I want to process this in chunks of gene_list_B (and gene_list_A remains constant)
this is my script which I wrote using this thread.
Intersection <- as.data.frame(matrix(NA,nrow = length(gene_list_A), ncol = length(gene_list_B)))
nSym <- length(gene_list_B)
chunkSize <- 10
for(i in 1:ceiling(nSym / chunkSize)){
Intersection[,] <- getMatrix(newGOM(gene_list_A,gene_list_B[((i-1)*chunkSize+1):min(nSym,(i*chunkSize))],gs.RNASeq),name= "pval")
}
dim(Intersection)
The script runs but has a lot of errors
- I am not sure if I am calling and filling "Intersection" the empty data frame correctly in the script.
- If I run a small list as a test run the answer with and without hack are different.
Can anyone please help me in solving this? The script I have written may be wrong so any other changes, suggestions also welcome
Thanks
来源:https://stackoverflow.com/questions/63984981/processing-a-list-in-chunks-and-combining-the-result