Nested if else statements over a number of columns

后端 未结 4 1044
情深已故
情深已故 2020-12-03 00:07

I have a large data.frame where the first three columns contain information about a marker. The remaining columns are of numeric type for that

4条回答
  •  甜味超标
    2020-12-03 00:53

    These is an other possible solution. All solution above are valid.

    My solution is create a function for your case-sensitive without the use of a new library. It's quite long and it's possible to compact, but it's useful to see each step in order to understand how the function works.

    olddf <- data.frame(marker = c("kgp5209280_chr3_21902067",
            "chr3_21902130_21902131_A_T",
            "chr3_21902134_21902135_T_C"),
            alleleA = c("T","A","T"),
            alleleB = c("A","T","C"),
            X818 = c(0.0000,0.8626,0.6982),
            X818.1 = c(1.0000,0.1356,0.2854),
            X818.2 = c(0.0000,0.0018,0.0164),
            X345 = c(1.0000,0.7676, 0.5617),
            X345.1 = c(0.0000, 0.2170, 0.3749),
            X345.2 = c(0.0000, 0.0154, 0.0634),   
            X346 = c(0.0000, 0.8626, 0.6982),
            X346.1 = c(1.0000,0.1356, 0.2854), 
            X346.2 = c(0.0000, 0.0018, 0.0164))
    
    
    mergeallele <- function(arguments,threshold = 0.8){
        n <- nrow(arguments)
        # Creation of a results object as an empty list of length NROW
        # speed for huge data.frame 
        new.lst <- vector(mode="list", n)
        for (i in 1:n){
            marker_row <- arguments[i,]
            colvalue.4 <- NaN
            if (max(marker_row[,c(4:6)]) < threshold){
                colvalue.4 <- max(marker_row[,c(4:6)])
            }
    
            colvalue.5 <- NaN       
            if (max(marker_row[,c(7:9)]) < threshold){
                colvalue.5 <- max(marker_row[,c(7:9)])
            }
    
            colvalue.6 <- NaN       
            if (max(marker_row[,c(10:12)]) < threshold){
                colvalue.6 <- max(marker_row[,c(10:12)])
            }
            new.lst[[i]]  <- data.frame(marker_row[,1],
                marker_row[,2],
                marker_row[,3],
                colvalue.4,
                colvalue.5,
                colvalue.6)     
        }   
        new.df <- as.data.frame(do.call("rbind",new.lst))
        names(new.df) <-  c(colnames(arguments)[1],
                        colnames(arguments)[2],
                        colnames(arguments)[3],
                        colnames(arguments)[4],
                        colnames(arguments)[7],
                        colnames(arguments)[10])
        return(new.df)
    }
    
    
    newdf <- mergeallele(olddf)
    
                          marker alleleA alleleB   X818   X345   X346
    1   kgp5209280_chr3_21902067       T       A    NaN    NaN    NaN
    2 chr3_21902130_21902131_A_T       A       T    NaN 0.7676    NaN
    3 chr3_21902134_21902135_T_C       T       C 0.6982 0.5617 0.6982
    

    about:

    threshold = 0.8 
    

    you can set your the threshold value (ex: 0.8) avoid to change variable inside the function

    new.lst <- vector(mode="list", n)
    

    you can create a empty list of length the old data.frame and the elements of the list are then gradually filled with the loop results (much faster). See the test speed from this Blog

提交回复
热议问题