问题
I have a data frame as shown below
B_ID no_show Session slot_num walkin ns_w c_ns_w c_walkin
1 0.4 S1 1 0.2 0.2 0.2 0.2
2 0.3 S1 2 0.5 -0.2 0.2 0.7
3 0.8 S1 3 0.5 0.3 0.5 1.2
4 0.3 S1 4 0.8 -0.5 0.0 2.0
5 0.6 S1 5 0.4 0.2 0.2 2.4
6 0.8 S1 6 0.2 0.6 0.8 2.6
7 0.9 S1 7 0.1 0.8 1.4 2.7
8 0.4 S1 8 0.5 -0.1 1.3 3.2
9 0.6 S1 9 0.1 0.5 1.8 3.3
12 0.9 S2 1 0.9 0.0 0.0 0.9
13 0.5 S2 2 0.4 0.1 0.1 1.3
14 0.3 S2 3 0.1 0.2 0.3 1.4
15 0.7 S2 4 0.4 0.3 0.6 1.8
20 0.7 S2 5 0.1 0.6 1.2 1.9
16 0.6 S2 6 0.3 0.3 1.5 2.2
17 0.8 S2 7 0.5 0.3 1.8 2.7
19 0.3 S2 8 0.8 -0.5 1.3 3.5
where,
df[ns_w] = df['no_show'] - df['walkin']
c_ns_w = cumulaitve of ns_w
df['c_ns_w'] = df.groupby(['Session'])['ns_w'].cumsum()
c_walkin = cumulative of walkin
df['c_walkin'] = df.groupby(['Session'])['walkin'].cumsum()
From the above I would like to calculate two columns called u_ns_w
and u_c_walkin
.
And when ever u_c_walkin > 0.9
create a new row with no_show = 0
, walkin=0
and all other values will be same as the above row. where B_ID = walkin1, 2, etc,
and subtract 1
from the above u_c_walkin
.
At the same time when ever u_c_ns_w > 0.8
add a new row with B_ID = overbook1, 2 etc
, with no_show = 0.5, walkin=0, ns_w = 0.5
and all other values same as above row and subtract 0.5
from the above u_c_ns_w
.
Expected output:
B_ID no_show Session slot_num walkin ns_w c_ns_w c_walkin u_c_walkin u_c_ns_w
1 0.4 S1 1 0.2 0.2 0.2 0.2 0.2 0.2
2 0.3 S1 2 0.5 -0.2 0.2 0.7 0.7 0.2
3 0.8 S1 3 0.5 0.3 0.5 1.2 1.2 0.5
walkin1 0.0 S1 3 0.0 0.3 0.5 1.2 0.2 0.5
4 0.3 S1 4 0.8 -0.5 0.0 2.0 1.0 0.0
walkin2 0.0 S1 4 0.0 -0.5 0.0 2.0 0.0 0.0
5 0.6 S1 5 0.4 0.2 0.2 2.4 0.4 0.2
6 0.8 S1 6 0.2 0.6 0.8 2.6 0.6 0.8
7 0.9 S1 7 0.1 0.8 1.4 2.7 0.7 1.4
overbook1 0.5 S1 7 0.0 0.5 1.4 2.7 0.7 0.9
8 0.4 S1 8 0.5 -0.1 1.3 3.2 1.2 0.8
walkin3 0.0 S1 8 0.0 -0.1 1.3 3.2 0.2 0.8
9 0.6 S1 9 0.1 0.5 1.8 3.3 0.1 1.3
overbook2 0.5 S1 9 0.0 0.5 1.8 3.3 0.1 0.8
12 0.9 S2 1 0.9 0.0 0.0 0.9 0.9 0.0
13 0.5 S2 2 0.4 0.1 0.1 1.3 1.3 0.1
walkin1 0.0 S2 2 0.0 0.1 0.1 1.3 0.3 0.1
14 0.3 S2 3 0.1 0.2 0.3 1.4 0.4 0.3
15 0.7 S2 4 0.4 0.3 0.6 1.8 0.8 0.6
20 0.7 S2 5 0.1 0.6 1.2 1.9 0.9 1.2
overbook1 0.5 S2 5 0.0 0.5 1.2 1.9 0.9 0.7
16 0.6 S2 6 0.3 0.3 1.5 2.2 1.2 1.0
walkin2 0.0 S2 6 0.3 0.3 1.5 2.2 0.2 1.0
overbook2 0.5 S2 6 0.0 0.5 1.5 2.2 0.2 0.5
17 0.8 S2 7 0.5 0.3 1.8 2.7 0.7 0.8
19 0.3 S2 8 0.8 -0.5 1.3 3.5 1.5 0.3
walkin3 0.0 S2 8 0.8 -0.5 1.3 3.5 0.5 0.3
I tried below code to create the walkin rows but not able to create for overbook rows.
def create_u_columns (ser):
l_index = []
arr_ns = ser.to_numpy()
# array for latter insert
arr_idx = np.zeros(len(ser), dtype=int)
walkin_id = 1
for i in range(len(arr_ns)-1):
if arr_ns[i]>0.8:
# remove 1 to u_no_show
arr_ns[i+1:] -= 1
# increment later idx to add
arr_idx[i] = walkin_id
walkin_id +=1
#return a dataframe with both columns
return pd.DataFrame({'u_cumulative': arr_ns, 'mask_idx':arr_idx}, index=ser.index)
df[['u_c_walkin', 'mask_idx']]= df.groupby(['Session'])['c_walkin'].apply(create_u_columns)
# select the rows
df_toAdd = df.loc[df['mask_idx'].astype(bool), :].copy()
# replace the values as wanted
df_toAdd['no_show'] = 0
df_toAdd['walkin'] = 0
df_toAdd['EpisodeNumber'] = 'walkin'+df_toAdd['mask_idx'].astype(str)
df_toAdd['u_c_walkin'] -= 1
# add 0.5 to index for later sort
df_toAdd.index += 0.5
new_df = pd.concat([df,df_toAdd]).sort_index()\
.reset_index(drop=True).drop('mask_idx', axis=1)
回答1:
Here you can modify the function this way to do both check at the same time. Please check that it is exactly the conditions you want to apply for the walkin and overbook dataframes.
def create_columns(dfg):
arr_walkin = dfg['c_walkin'].to_numpy()
arr_ns = dfg['c_ns_w'].to_numpy()
# array for latter insert
arr_idx_walkin = np.zeros(len(arr_walkin), dtype=int)
arr_idx_ns = np.zeros(len(arr_ns), dtype=int)
walkin_id = 1
oberbook_id = 1
for i in range(len(arr_ns)):
# condition on c_walkin
if arr_walkin[i]>0.9:
# remove 1 to u_no_show
arr_walkin[i+1:] -= 1
# increment later idx to add
arr_idx_walkin[i] = walkin_id
walkin_id +=1
# condition on c_ns_w
if arr_ns[i]>0.8:
# remove 1 to u_no_show
arr_ns[i+1:] -= 0.5
# increment later idx to add
arr_idx_ns[i] = oberbook_id
oberbook_id +=1
#return a dataframe with both columns
return pd.DataFrame({'u_c_walkin': arr_walkin,
'u_c_ns_w': arr_ns,
'mask_idx_walkin':arr_idx_walkin,
'mask_idx_ns': arr_idx_ns }, index=dfg.index)
df[['u_c_walkin', 'u_c_ns_w', 'mask_idx_walkin', 'mask_idx_ns']]=\
df.groupby(['Session'])[['c_walkin', 'c_ns_w']].apply(create_columns)
# select the rows for walkin
df_walkin = df.loc[df['mask_idx_walkin'].astype(bool), :].copy()
# replace the values as wanted
df_walkin['no_show'] = 0
df_walkin['walkin'] = 0
df_walkin['B_ID'] = 'walkin'+df_walkin['mask_idx_walkin'].astype(str)
df_walkin['u_c_walkin'] -= 1
# add 0.5 to index for later sort
df_walkin.index += 0.2
# select the rows for ns_w
df_ns = df.loc[df['mask_idx_ns'].astype(bool), :].copy()
# replace the values as wanted
df_ns['no_show'] = 0.5
df_ns['walkin'] = 0
df_ns['ns_w'] = 0.5
df_ns['B_ID'] = 'overbook'+df_ns['mask_idx_ns'].astype(str)
df_ns['u_c_ns_w'] -= 0.5
# add 0.5 to index for later sort
df_ns.index += 0.4
new_df = pd.concat([df,df_walkin, df_ns]).sort_index()\
.reset_index(drop=True).drop(['mask_idx_walkin','mask_idx_ns'], axis=1)
and you get:
print (new_df)
B_ID no_show Session slot_num walkin ns_w c_ns_w c_walkin \
0 1 0.4 S1 1 0.2 0.2 0.2 0.2
1 2 0.3 S1 2 0.5 -0.2 0.2 0.7
2 3 0.8 S1 3 0.5 0.3 0.5 1.2
3 walkin1 0.0 S1 3 0.0 0.3 0.5 1.2
4 4 0.3 S1 4 0.8 -0.5 0.0 2.0
5 walkin2 0.0 S1 4 0.0 -0.5 0.0 2.0
6 5 0.6 S1 5 0.4 0.2 0.2 2.4
7 6 0.8 S1 6 0.2 0.6 0.8 2.6
8 7 0.9 S1 7 0.1 0.8 1.4 2.7
9 overbook1 0.5 S1 7 0.0 0.5 1.4 2.7
10 8 0.4 S1 8 0.5 -0.1 1.3 3.2
11 walkin3 0.0 S1 8 0.0 -0.1 1.3 3.2
12 9 0.6 S1 9 0.1 0.5 1.8 3.3
13 overbook2 0.5 S1 9 0.0 0.5 1.8 3.3
14 12 0.9 S2 1 0.9 0.0 0.0 0.9
15 13 0.5 S2 2 0.4 0.1 0.1 1.3
16 walkin1 0.0 S2 2 0.0 0.1 0.1 1.3
17 14 0.3 S2 3 0.1 0.2 0.3 1.4
18 15 0.7 S2 4 0.4 0.3 0.6 1.8
19 20 0.7 S2 5 0.1 0.6 1.2 1.9
20 overbook1 0.5 S2 5 0.0 0.5 1.2 1.9
21 16 0.6 S2 6 0.3 0.3 1.5 2.2
22 walkin2 0.0 S2 6 0.0 0.3 1.5 2.2
23 overbook2 0.5 S2 6 0.0 0.5 1.5 2.2
24 17 0.8 S2 7 0.5 0.3 1.8 2.7
25 19 0.3 S2 8 0.8 -0.5 1.3 3.5
26 walkin3 0.0 S2 8 0.0 -0.5 1.3 3.5
u_c_walkin u_c_ns_w
0 0.2 0.2
1 0.7 0.2
2 1.2 0.5
3 0.2 0.5
4 1.0 0.0
5 0.0 0.0
6 0.4 0.2
7 0.6 0.8
8 0.7 1.4
9 0.7 0.9
10 1.2 0.8
11 0.2 0.8
12 0.3 1.3
13 0.3 0.8
14 0.9 0.0
15 1.3 0.1
16 0.3 0.1
17 0.4 0.3
18 0.8 0.6
19 0.9 1.2
20 0.9 0.7
21 1.2 1.0
22 0.2 1.0
23 1.2 0.5
24 0.7 0.8
25 1.5 0.3
26 0.5 0.3
来源:https://stackoverflow.com/questions/61496685/create-new-rows-based-on-values-of-one-of-the-columns-in-the-above-row-with-spec