Suppose I have a dataframe like this:
ID 0 1 2 3 4 5 6 7 8 ... 81 82 83 84 85 86 87 88 89 90 total day_90
----------
Here is my solution, see the comments in the code:
import numpy as np, pandas as pd
import io
# Test data:
text=""" ID 0 1 2 3 4 5 6 7 8 day_90
0 A 2 21 0 18 3 0 0 0 2 4
1 B 0 20 12 2 0 8 14 23 0 5
2 C 0 38 19 3 1 3 3 7 1 1
3 D 3 0 0 1 0 0 0 0 0 0"""
df= pd.read_csv( io.StringIO(text),sep=r"\s+",engine="python")
#------------------------
# Convert some column names into integer:
cols= list(range(9))
df.columns= ["ID"]+ cols +["day_90"]
#----------
istart,istop= df.columns.get_loc(0), df.columns.get_loc(8)+1
# The required length of the 1st zero sequence:
lseq= 2
# The function for aggregating: this is the main calculation, 'r' is a row of 'df':
def zz(r):
s= r.iloc[r.day_90+istart:istop] # get the day columns starting with as fixed in 'day_90'
#--- Manipulate 's' to make possible using 'groupby' for getting different sequences:
crit=s.eq(0)
s= pd.Series(np.where(crit, np.nan, np.arange(len(s))),index=s.index)
if np.isnan(s.iloc[0]):
s.iloc[0]= 1
s= s.ffill()
s[~crit]= np.nan
#---
# get the sequences and their sizes:
ssiz= s.groupby(s).size()
return ssiz.iloc[0] if len(ssiz) and ssiz.iloc[0]>lseq else np.nan
#---
df["zseq"]= df.agg(zz,axis=1)
ID 0 1 2 3 4 5 6 7 8 day_90 zseq
0 A 2 21 0 18 3 0 0 0 2 4 3.0
1 B 0 20 12 2 0 8 14 23 0 5 NaN
2 C 0 38 19 3 1 3 3 7 1 1 NaN
3 D 3 0 0 1 0 0 0 0 0 0 NaN