create a function to create new rows in data frames based on the given parameters as list in pandas

前端 未结 1 821
萌比男神i
萌比男神i 2020-12-11 12:35

I have a data frame as shown below. where the data always will have one session. That means number of unique value in a column \'Session\' will be one always.

df:

相关标签:
1条回答
  • 2020-12-11 13:06

    Here is one way to do it

    # function to create the u_cumulative
    def create_u_columns (ser, threshold_ns = 0.8):
        # create a copy
        arr_ns = ser.to_numpy().copy()
        # array for latter insert
        arr_idx = np.zeros(len(ser), dtype=int)
        walkin_id = 0 #start at 0 not 1 for list indexing
        for i in range(len(arr_ns)-1):
            if arr_ns[i]>threshold_ns:
                # remove 1 - walkin
                arr_ns[i+1:] -= (1-walkin_no_show[walkin_id])
                # increment later idx to add
                arr_idx[i] = walkin_id+1
                walkin_id +=1
        # for the last row
        if arr_ns[-1]>threshold_ns:
            arr_idx[-1] = walkin_id+1
        #return a dataframe with both columns
        return pd.DataFrame({'u_cumulative': arr_ns, 'mask_idx':arr_idx}, index=ser.index)
    

    Now define another function overbook_dfs

    def overbook_dfs (df0, walkin_no_show, threshold_p ):
        l_res = [] #for result
        for th_p in threshold_p: #loop on threshold
            # create a copy of original dataframe
            df = df0.copy() 
            df[['u_cumulative','mask_idx']] = create_u_columns(df['Cumulative_no_show'],
                                                               threshold_ns=th_p)
            # select the rows
            df_toAdd = df.loc[df['mask_idx'].astype(bool), :].copy()
            # replace the values as wanted
            df_toAdd['No_Show'] = walkin_no_show[:len(df_toAdd)]
            df_toAdd['B_ID'] = 'walkin'+df_toAdd['mask_idx'].astype(str)
            df_toAdd['u_cumulative'] -= (1 - df_toAdd['No_Show'])
            # add 0.5 to index for later sort
            df_toAdd.index += 0.5 
            #append the result to a list
            l_res.append(pd.concat([df,df_toAdd])
                           .sort_index()
                           .reset_index(drop=True)
                           .drop('mask_idx', axis=1)
                        )
        return l_res
    

    Finally, use it with the parameters

    # parameters
    walkin_no_show = [ 0.3, 0.2, 0.1, 0.4, 0.5, 0.4, 0.2, 0.7, 0.8]
    threshold_p = [0.8, 0.9, 1.0, 1.1]
    
    # call your function
    df_0_8, df_0_9, df_1_0, df_1_1 = overbook_dfs(df, walkin_no_show, threshold_p)
    
    print (df_0_9)
           B_ID  No_Show Session  slot_num  Cumulative_no_show  u_cumulative
    0         1      0.4      s1         1                 0.4           0.4
    1         2      0.3      s1         2                 0.7           0.7
    2         3      0.8      s1         3                 1.5           1.5
    3   walkin1      0.3      s1         3                 1.5           0.8
    4         4      0.3      s1         4                 1.8           1.1
    5   walkin2      0.2      s1         4                 1.8           0.3
    6         5      0.6      s1         5                 2.4           0.9
    7         6      0.8      s1         6                 3.2           1.7
    8   walkin3      0.1      s1         6                 3.2           0.8
    9         7      0.9      s1         7                 4.1           1.7
    10  walkin4      0.4      s1         7                 4.1           1.1
    11        8      0.4      s1         8                 4.5           1.5
    12  walkin5      0.5      s1         8                 4.5           1.0
    13        9      0.6      s1         9                 5.1           1.6
    14  walkin6      0.4      s1         9                 5.1           1.0
    

    Note this would fail if the list walkin_no_show is not long enough

    0 讨论(0)
提交回复
热议问题