Filling data frame with previous row value

前端未结

关注

 7  623

I have a data frame that has 2 columns.

column1 has random numbers in column2 is a place holding column for what i want column3 to look like

  random


                      
              相关标签:


      
      
        
          7条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  轮回少年        
                
              
                            
                2020-12-29 13:44
              
            
            
                                                                       
Also, unless I'm overlooking something, this seems to work:

DF$state2 <- ave(DF$temp, cumsum(DF$temp), FUN = function(x) x[x != 0])
DF
#       random temp state state2
#1  0.50242337  1.0   1.0    1.0
#2  0.68759406  0.0   1.0    1.0
#3  0.74188374  0.0   1.0    1.0
#4  0.44536403  0.0   1.0    1.0
#5  0.50626137  0.5   0.5    0.5
#6  0.51636498  0.0   0.5    0.5
#7  0.80780471  0.0   0.5    0.5
#8  0.24794844  0.0   0.5    0.5
#9  0.46573337  0.0   0.5    0.5
#10 0.10370515  0.0   0.5    0.5
#11 0.07962587  1.0   1.0    1.0
#12 0.93892894  0.0   1.0    1.0
#13 0.67771302  0.0   1.0    1.0
#14 0.11223162  0.0   1.0    1.0
#15 0.16590718  0.0   1.0    1.0
#16 0.83619527  0.0   1.0    1.0
#17 0.38771300  1.0   1.0    1.0
#18 0.14773708  0.0   1.0    1.0
#19 0.43928154  0.5   0.5    0.5
#20 0.08901350  0.0   0.5    0.5
#21 0.84174743  0.0   0.5    0.5
#22 0.93173871  0.0   0.5    0.5
#23 0.80795517  1.0   1.0    1.0

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  栀梦        
                
              
                            
                2020-12-29 13:51
              
            
            
                                                                       
Simply use a loop with a global variable ,

globalvariable used here is m, r is a dataframe with two columns A and B.

r$B = c(1,NA, NA, NA, 3, NA,6)


m=1

for( i in 1:nrow(r) ){

  if(is.na(r$B[i])==FALSE ){

    m <<- i # please note the assign sign ,  " <<- "
    next()

  } else {

    r$B[i] = r$B[m]

  }

}


After Execution :
r$B = 1 1 1 1 3 3 6
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  误落风尘        
                
              
                            
                2020-12-29 13:52
              
            
            
                                                                       
Perhaps you can make use of na.locf from the "zoo" package after setting values of "0" to NA. Assuming your data.frame is called "mydf":

mydf$state <- mydf$temp
mydf$state[mydf$state == 0] <- NA

library(zoo)
mydf$state <- na.locf(mydf$state)
#      random temp state
# 1 0.5024234  1.0   1.0
# 2 0.6875941  0.0   1.0
# 3 0.7418837  0.0   1.0
# 4 0.4453640  0.0   1.0
# 5 0.5062614  0.5   0.5
# 6 0.5163650  0.0   0.5




If there were NA values in your original data.frame in the "temp" column, and you wanted to keep them as NA in the newly generated "state" column too, that's easy to take care of. Just add one more line to reintroduce the NA values:

mydf$state[is.na(mydf$temp)] <- NA

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  挽巷        
                
              
                            
                2020-12-29 13:52
              
            
            
                                                                       
I suggest using the run length encoding functions, it's a natural way for dealing with steaks in a data set. Using @Kevin's example vector:

temp = c(1,0,0,0,.5,0,0,0,0,0,1,0,0,0,0,0,1,0,0.5,0,0,0,1)
y <- rle(temp)
#str(y)
#List of 2
# $ lengths: int [1:11] 1 3 1 5 1 5 1 1 1 3 ...
# $ values : num [1:11] 1 0 0.5 0 1 0 1 0 0.5 0 ...
# - attr(*, "class")= chr "rle"


for( i in seq(y$values)[-1] ) {
   if(y$values[i] == 0) {
      y$lengths[i-1] = y$lengths[i] + y$lengths[i-1]
      y$lengths[i] = 0
   }
}

#str(y)
#List of 2
# $ lengths: num [1:11] 4 0 6 0 6 0 2 0 4 0 ...
# $ values : num [1:11] 1 0 0.5 0 1 0 1 0 0.5 0 ...
# - attr(*, "class")= chr "rle"

inverse.rle(y)
#  [1] 1.0 1.0 1.0 1.0 0.5 0.5 0.5 0.5 0.5 0.5 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 0.5
# [20] 0.5 0.5 0.5 1.0

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  借酒劲吻你        
                
              
                            
                2020-12-29 13:56
              
            
            
                                                                       
Inspired by the solution of @Ananda Mahto, this is an adaption of the internal code of na.locf that works directly with 0's instead of NAs. Then you don't need the zoo package and you don't need to do the preprocessing of changing the values to NA. Benchmarktests show that this is about 10 times faster than the original version. 

locf.0 <- function(x) {
  L <- x!=0
  idx <- c(0, which(L))[cumsum(L) + 1]
  return(x[idx])
} 
mydf$state <- locf.0(mydf$temp)

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  情话喂你        
                
              
                            
                2020-12-29 14:02
              
            
            
                                                                       
Here is an interesting way with the Reduce function.

temp = c(1,0,0,0,.5,0,0,0,0,0,1,0,0,0,0,0,1,0,0.5,0,0,0,1)
fill_zero = function(x,y) if(y==0) x else y
state = Reduce(fill_zero, temp, accumulate=TRUE)


If you're worried about speed, you can try Rcpp.

library(Rcpp)
cppFunction('
  NumericVector fill_zeros( NumericVector x ) {
    for( int i=1; i<x.size(); i++ )
     if( x[i]==0 ) x[i] = x[i-1];
    return x;
  }
')
state = fill_zeros(temp)

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
   
          
     1
2
下一页
           
           
        
                                  
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复