Python pandas resampling

前端 未结 1 1631
无人共我
无人共我 2020-12-20 04:03

I have the following dataframe:

    Timestamp    S_time1   S_time2   End_Time_1   End_time_2   Sign_1   Sign_2
0    2413044       0        0           0              


        
相关标签:
1条回答
  • 2020-12-20 04:06

    Input:

    print df
      Timestamp  S_time1  S_time2  End_Time_1  End_time_2 Sign_1 Sign_2
    0    2413044        0        0           0           0      x      x
    1    2422476        0        0           0           0      x      x
    2    2431908        0        0           0           0      x      x
    3    2441341        0        0           0           0      x      x
    4    2541232  2526631  2528631     2520631     2530631     10     80
    5    2560273  2544946  2546496     2546496     2548496     40     80
    6    2577224  2564010  2566010     2566010     2568010   null   null
    7    2592905  2580959  2582959     2582959     2584959   null   null
    

    2 approaches:

    In [231]: %timeit s(df)
    1 loops, best of 3: 2.78 s per loop
    
    In [232]: %timeit m(df)
    1 loops, best of 3: 690 ms per loop
    
    def m(df):
        #resample column Timestamp by 100ms, convert bak to integers 
        df['Timestamp'] = df['Timestamp'].astype('timedelta64[ms]')
        df['i'] = 1
        df = df.set_index('Timestamp')
        df1 = df[[]].resample('100ms', how='first').reset_index()
        df1['Timestamp'] = (df1['Timestamp'] / np.timedelta64(1, 'ms')).astype(int)
        #felper column i for merging
        df1['i'] = 1
        #print df1
    
        out = df1.merge(df,on='i', how='left')
        out1 = out[['Timestamp', 'Sign_1']][(out.Timestamp >= out.S_time1) & (out.Timestamp <= out.End_Time_1)]
        out2 = out[['Timestamp', 'Sign_2']][(out.Timestamp >= out.S_time2) & (out.Timestamp <= out.End_time_2)]
    
        out1 = out1.rename(columns={'Sign_1':'Bin_time'})
        out2 = out2.rename(columns={'Sign_2':'Bin_time'})
    
        df = pd.concat([out1, out2], ignore_index=True).drop_duplicates(subset='Timestamp')
        df1 = df1.set_index('Timestamp')
        df = df.set_index('Timestamp')
        df = df.reindex(df1.index).reset_index()
        #print df.head(10)
    
    def s(df):
        #resample column Timestamp by 100ms, convert bak to integers 
        df['Timestamp'] = df['Timestamp'].astype('timedelta64[ms]')
        df = df.set_index('Timestamp')
        out = df[[]].resample('100ms', how='first')
        out = out.reset_index()
        out['Timestamp'] = (out['Timestamp'] / np.timedelta64(1, 'ms')).astype(int)
        #print out.head(10)
    
        #search start end 
        def search(x):
            mask1 = (df.S_time1<=x['Timestamp']) & (df.End_Time_1>=x['Timestamp'])
            #if at least one True return first value of series
            if mask1.any():
                    return df.loc[mask1].Sign_1[0]
            #check second start and end time
            else:
                    mask2 = (df.S_time2<=x['Timestamp']) & (df.End_time_2>=x['Timestamp'])
                    if mask2.any():
                        #if at least one True return first value
                        return df.loc[mask2].Sign_2[0]
                    else:
                        #if all False return NaN
                        return np.nan
    
        out['Bin_time'] = out.apply(search, axis=1)
        #print out.head(10)
    
    0 讨论(0)
提交回复
热议问题