I have a dataframe df which can be created with this:
data={\'id\':[1,1,1,1,2,2,2,2],
\'date1\':[datetime.date(2016,1,1),datetime.date(201
This is inelegant, but hey, it works! (EDIT: added a second method below.)
# Convert datetime.date to pandas timestamps for easier comparisons
df['date1'] = pd.to_datetime(df['date1'])
df['date2'] = pd.to_datetime(df['date2'])
# solution
newdf = pd.DataFrame(data=drange, columns=['usedate'])
# for each usedate ud, get all df rows whose dates contain ud,
# then sum the scores of these rows
newdf['score1sum'] = [df[(df['date1'] <= ud) & (df['date2'] >= ud)]['score1'].sum() for ud in drange]
newdf['score2sum'] = [df[(df['date1'] <= ud) & (df['date2'] >= ud)]['score2'].sum() for ud in drange]
# output
newdf
usedate score1sum score2sum
2016-01-01 8 8
2016-01-02 21 6
2016-01-03 32 13
2016-01-04 30 35
2016-01-05 13 26
transform (or apply)newdf = pd.DataFrame(data=drange, columns=['usedate'])
def sum_scores(d):
return df[(df['date1'] <= d) & (df['date2'] >= d)][['score1', 'score2']].sum()
# apply works here too, and is about equally fast in my testing
newdf[['score1sum', 'score2sum']] = newdf['usedate'].transform(sum_scores)
# newdf is same to above
# Jupyter timeit cell magic
%%timeit
newdf['score1sum'] = [df[(df['date1'] <= d) & (df['date2'] >= d)]['score1'].sum() for d in drange]
newdf['score1sum'] = [df[(df['date1'] <= d) & (df['date2'] >= d)]['score2'].sum() for d in drange]
100 loops, best of 3: 10.4 ms per loop
# Jupyter timeit line magic
%timeit newdf[['score1sum', 'score2sum']] = newdf['usedate'].transform(sum_scores)
100 loops, best of 3: 8.51 ms per loop