背景:生物信息学
目的:衡量算法性能
手段:计算topN个基因在已知基准数据集中的概率,得到每个算法的precision
#衡量五种算法的性能
accuracy_A=c()
accuracy_B=c()
accuracy_C=c()
accuracy_D=c()
accuracy_F=c()
a<-read.csv("算法1结果路径",header=FALSE)
A<-as.matrix(a)
b<-read.csv("算法2结果路径",header=FALSE)
B<-as.matrix(b)
c<-read.csv("算法3结果路径",header=FALSE)
C<-as.matrix(c)
d<-read.csv("算法4结果路径",header=FALSE)
D<-as.matrix(d)
f<-read.csv("算法5结果路径",header=FALSE)
F<-as.matrix(f)
E<-read.table("CGC.txt") #基准数据集路径
j=0
aa=0
bb=0
k=0
m=0
n=0
p=0
cc=0
dd=0
pp=0
for ( i in 1:50) #衡量top50的结果
{
if(length(which(A[i,1]==E)))
{j=j+1}
if(i%%2==0&i>=10)
{
aa=aa+1
accuracy_A[aa]=j/i
}
if(length(which(B[i,1]==E)))
{k=k+1}
if(i%%2==0&i>=10)
{
bb=bb+1
accuracy_B[bb]=k/i
}
if(length(which(C[i,1]==E)))
{m=m+1}
if(i%%2==0&i>=10)
{
cc=cc+1
accuracy_C[cc]=m/i
}
if(length(which(D[i,1]==E)))
{n=n+1}
if(i%%2==0&i>=10)
{
dd=dd+1
accuracy_D[dd]=n/i
}
if(length(which(F[i,1]==E)))
{p=p+1}
if(i%%2==0&i>=10)
{
pp=pp+1
accuracy_F[pp]=p/i
}
}
x=seq(10,50,2)
par(font=2)
plot(x,accuracy_A,xlim=c(10,50),ylim=c(0,1),col="blue",type="o",pch=19,lty=2,xlab="Top N drivers",ylab="Precision",lwd=2,font.lab=2,font=2,cex.lab=1.5,cex.axis=1.5)
lines(x,accuracy_B,col="red3",type="o",pch=18,lty=2,lwd=2)
lines(x,accuracy_C,col="gray30",type="o",pch=17,lty=2,lwd=2)
lines(x,accuracy_D,col="springgreen3",type="o",pch=16,lty=2,lwd=2)
lines(x,accuracy_F,col="purple",type="o",pch=15,lty=2,lwd=2)
title("title",cex.main=2)
legend("topright",inset=.02,legend=c("Our_method","SCS","OncoIMPACT","DriverNet","Frequency"),col=c("blue","red3","gray30","springgreen3","purple"),lty=c(2,2,2,2,2),pch=c(19,18,17,16,15),lwd=1,cex=0.6)